예제 #1
0
def get_pattern_data(search_param):

    twitter = Twitter(language='en')

    for tweet in twitter.search(search_param, cached=True):
        print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))

    g = Graph()
    for i in range(10):
        for result in twitter.search(search_param, start=i + 1, count=50):
            s = result.text.lower()
            s = plaintext(s)
            s = parsetree(s)
            p = '{NP} (VP) ' + search_param + ' {NP}'
            for m in search(p, s):
                x = m.group(1).string  # NP left
                y = m.group(2).string  # NP right
                if x not in g:
                    g.add_node(x)
                    if y not in g:
                        g.add_node(y)
                    g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75))  # R,G,B,A

    #if len(g)>0:
    #   g = g.split()[0] # Largest subgraph.

    for n in g.sorted()[:40]:  # Sort by Node.weight.
        n.fill = (0, 0.5, 1, 0.75 * n.weight)

    g.export('data', directed=False, weighted=0.6)
def get_pattern_data(search_param):
   
   twitter = Twitter(language='en') 
   
   for tweet in twitter.search(search_param, cached=True):
      print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))
   

   g = Graph()
   for i in range(10):
      for result in twitter.search(search_param, start=i+1,count=50):
         s = result.text.lower() 
         s = plaintext(s)
         s = parsetree(s)
         p = '{NP} (VP) ' +search_param+ ' {NP}'
         for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
               g.add_node(x)
               if y not in g:
                  g.add_node(y)
               g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A

   #if len(g)>0:   
   #   g = g.split()[0] # Largest subgraph.

   for n in g.sorted()[:40]: # Sort by Node.weight.
      n.fill = (0, 0.5, 1, 0.75 * n.weight)

   g.export('data', directed=False, weighted=0.6)
예제 #3
0
def obtenerTweets(request):
    twitterEn = Twitter(language='en')
    twitterEs = Twitter(language='es')
    idJuego = request.GET.get("id")
    juego = Juego.objects.get(id=idJuego)
    tweets = []
    for tweet in twitterEs.search(juego.titulo, cached=False):
        tweets.append(tweet.text)
    for tweet in twitterEn.search(juego.titulo, cached=False):
        tweets.append(tweet.text)
    return render(request, 'obtenerTweets.html', {'tweets': tweets})
예제 #4
0
def search_with_language_in_region(lang, capital_city, search_terms, file_name):
    """
    Does a twitter search in the specified language in the area of a given capital city
    e.g. search_with_language_in_region('en', 'Paris', '#yoloswag', 'Paris_yoloswag')
    
    Inputs: expects strings for everything.
        lang: the language you want to search in [string], e.g. 'en'
        capital_city: the city you want to search around, found 
        through pattern's geocode function,  e.g. 'Paris'
        search_terms: duh. e.g. ['crimea','putin']
        file_name: the file name you want to save the tweets as, will come out as e.g. nealiscool.pickle
    
    Outputs: a pickled dictionary of the tweets, which are saved on disk as tweets_gathered.pickle.
    The keys of the dicitonary are the unique tweet IDs.
    """
    t = Twitter(language=lang)
    tweets_gathered = {}
    i = None
    for j in range(2):
        for tweet in t.search(search_terms, start=i, count=10,geo=geocode(capital_city)[:2]):
            print tweet.text
            print
            i = tweet.id
            tweets_gathered[tweet.id] = tweet.text
    f = open(file_name,'w')   
    pickle.dump(tweets_gathered,f)
    f.close()
def get_tweets():
	'''This function parses Twitter to find tweets about a user-defined political figure
	'''
	
	print 'This program measures the average sentiment of the populous towards a political candidate through the analysis of recent tweets\n' #introduce program to user
	print 'Enter the name of a candidate:'
	x = raw_input('> ') #receives name of candidate to search for
	print 'Enter number of tweets to search (max = 100)'
	twtNumstr = raw_input('> ') #recieve number of tweets to search for
	twtNum = int(twtNumstr) #convert to int to use in search

	if twtNum <= 1: #check if an invalid number was entered, and if so, correct it to either the minimum or maximum allowed
		twtNum = 2
		print 'Invalid number entered. The minimum of 2 tweets will be used.'
	elif twtNum > 100:
		twtNum = 100
		print 'Invalid number entered. The maximum of 100 tweets will be used.'

	t = Twitter() #search for tweets containing user-defined key word
	i = 0
	twts = []
	for j in range(1):
		for tweet in t.search(x, start=i, count=twtNum):
			twts.append(tweet.text)

	return twts
예제 #6
0
def crawl(topic, N=100, Nbatch=25):
    t = Twitter()  # language='en','id'
    M = N // Nbatch  #integer
    i, Tweets, keepCrawling = None, [], True
    for j in tqdm(range(M)):
        if keepCrawling:
            for tweet in t.search(topic, start=i, count=Nbatch):
                try:
                    Tweets.append(tweet)
                    i = tweet.id
                except:
                    print("Twitter Limit reached")
                    keepCrawling = False  # Second Break (outer loop)
                    break
        else:
            break
    print('Making sure we get the full tweets, please wait ...')
    for i, tweet in enumerate(tqdm(Tweets)):
        try:
            webPage = URL(tweet.url).download()
            soup = bs(webPage, 'html.parser')
            full_tweet = soup.find_all(
                'p',
                class_='TweetTextSize')[0]  #modify this to get all replies
            full_tweet = bs(str(full_tweet), 'html.parser').text
            Tweets[i]['fullTxt'] = full_tweet
        except:
            Tweets[i]['fullTxt'] = tweet.txt
    print('Done!... Total terdapat {0} tweet'.format(len(Tweets)))
    return Tweets
예제 #7
0
def search():

    query = str(raw_input("enter search query: "))
    t = Twitter()
    # i = None
    chances = 0
    fileSave.write(query + "\n")

    allChances = 0
    for tweet in t.search(query, start=None, count=5):

        print tweet.text

        # Calc tweet sentiment
        sent_int = sent(tweet.text)
        sent_str = str(sent_int)
        # print sent_str

        # Calc author's follower count
        follows_int = findFollows(tweet.author)
        follows_str = str(sent_int)
        # print follows_str

        # Calc chances; make cumulative
        chances = follows_int * sent_int
        print str(chances) + "\n"

        # File save
        save = sent_str + "\n" + follows_str + "\n \n"
        fileSave.write(save)

        allChances = allChances + chances

        print "OVERALL: " + str(allChances)
예제 #8
0
def gettweets(searchterms):
    tweetlist = []
    from pattern.web import Twitter, plaintext
    twitter = Twitter(language='en')
    for tweet in twitter.search(searchterms, cached=False):
        tweetlist.append(plaintext(tweet.text))
    return tweetlist
def gettweets(searchterms):
	tweetlist = []
	from pattern.web import Twitter, plaintext
	twitter = Twitter(language='en') 
	for tweet in twitter.search(searchterms, cached=False):
		tweetlist.append(plaintext(tweet.text))
	return tweetlist
예제 #10
0
    def getTweetsByCoord(self, term, lat, lng):
        
        twitter = Twitter(language='en')
        tweets = []
        for tweet in twitter.search('traffic', geo=(lat, lng)):
            tweets.append(tweet.text)

        return tweets
예제 #11
0
def twitter_search():
    t = Twitter(language='es')
    i = None
    for j in range(3):  # For pagination
        for r in t.search(query="#DonaldTrump", start=i, count=10):
            print(r.id, r.text, r.date)
        i = r.id
        print("----------------@@@@@@-------------")
예제 #12
0
    def getTweetsByCoord(self, term, lat, lng):

        twitter = Twitter(language='en')
        tweets = []
        for tweet in twitter.search('traffic', geo=(lat, lng)):
            tweets.append(tweet.text)

        return tweets
예제 #13
0
def search(text):
    list = []

    twitter = Twitter(language='en')
    for tweet in twitter.search(text, count=30, cached=False):
        list.append(tweet.text)

    return list
예제 #14
0
 def search_tweets(self, celeb):
     '''
     Pull tweets from the Twitter API that mention 
     the given celebrity
     '''
     twitter_api = Twitter(language='en')
     #TODO: up the count for the final project
     return twitter_api.search(celeb, count=3000)
예제 #15
0
def busco_en_twitter(cadena):

        t = Twitter()
        i = None
        for j in range(3):
                for tweet in t.search(cadena, start=i, count=10):
                        print(tweet.text)
                        print("-------")
                        i = tweet.id
예제 #16
0
    def get_tweets(self, search, nb, include_RT, useKey, keys):

        if not useKey:
            keys = None

        twitter = Twitter(
            language=self.dico_lang[self.language],
            license=keys
        )

        tweets = list()
        if not include_RT:
            for tweet in twitter.search(search, start=1, count=nb*3):
                if not tweet.text.startswith('RT'):
                    tweet_input = Input(tweet.text)
                    annotations = {
                        'source': 'Twitter',
                        'author': tweet.author,
                        'date': tweet.date,
                        'url': tweet.url,
                        'search': search,
                    }
                    segment = tweet_input[0]
                    segment.annotations.update(annotations)
                    tweet_input[0] = segment
                    tweets.append(tweet_input)
                if len(tweets) == nb:
                    break
        else:        
            for tweet in twitter.search(search, start=1, count=nb):
                tweet_input = Input(tweet.text)
                annotations = {
                    'source': 'Twitter',
                    'author': tweet.author,
                    'date': tweet.date,
                    'url': tweet.url,
                    'search': search,
                }
                segment = tweet_input[0]
                segment.annotations.update(annotations)
                tweet_input[0] = segment
                tweets.append(tweet_input)
        return tweets
예제 #17
0
파일: stream.py 프로젝트: Nuevalgo/Feedbot
def search(text):
	list = []
 

	twitter = Twitter(language='en') 
	for tweet in twitter.search(text, count=30, cached=False):
            list.append(tweet.text)

                
	return list
예제 #18
0
class tweetSentiment(object):


	def __init__(self, topic, tweetCount):
		self.topic = topic
		self.tweetCount = tweetCount
		self.t = Twitter(language='EN')
		self.i = None

	def fArray(self):
		'''full array including tweet and sentiment'''	
		fullArray = []

		for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount):
			fullArray.append([tweet.text,indicoio.sentiment(tweet.text)])
			self.i = tweet.id

		return fullArray

	def sArray(self):
		'''calculate sentiment '''
		sentimentArray = []

		for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount):
			sentimentArray.append(indicoio.sentiment(tweet.text))
			self.i = tweet.id

		return sentimentArray

	def average(self,numArray):
		'''average sentiment'''
		return sum(numArray)/len(numArray)

	def trending(self):
		'''trending sentiment'''

		trendArray = []

		for trend in Twitter().trends(cached=False):
			trendArray.append([trend,indicoio.sentiment(trend)])

		return trendArray
예제 #19
0
def find(tag):
    """
	Finds content and user ID of posts with specified hashtag and saves to
	.txt file. 
	"""
    twitter = open("twitter_data.txt", "r+")
    t = Twitter(language='en')

    #compiles 1000 tweets with the specified tag and saves content in file
    for tweet in t.search(tag, count=1000):
        twitter.write(str(tweet))
예제 #20
0
    def get_tweets(self, search, nb, include_RT, useKey, keys):

        if not useKey:
            keys = None

        twitter = Twitter(language=self.dico_lang[self.language], license=keys)

        tweets = list()
        if not include_RT:
            for tweet in twitter.search(search, start=1, count=nb * 3):
                if not tweet.text.startswith('RT'):
                    tweet_input = Input(tweet.text)
                    annotations = {
                        'source': 'Twitter',
                        'author': tweet.author,
                        'date': tweet.date,
                        'url': tweet.url,
                        'search': search,
                    }
                    segment = tweet_input[0]
                    segment.annotations.update(annotations)
                    tweet_input[0] = segment
                    tweets.append(tweet_input)
                if len(tweets) == nb:
                    break
        else:
            for tweet in twitter.search(search, start=1, count=nb):
                tweet_input = Input(tweet.text)
                annotations = {
                    'source': 'Twitter',
                    'author': tweet.author,
                    'date': tweet.date,
                    'url': tweet.url,
                    'search': search,
                }
                segment = tweet_input[0]
                segment.annotations.update(annotations)
                tweet_input[0] = segment
                tweets.append(tweet_input)
        return tweets
예제 #21
0
def fuzzy_find(thing):
    t = Twitter()

    fuzzy_things = fuzzy_list(thing)

    tweets = []
    for item in fuzzy_things:
        new_tweets = t.search(item, count=50, throttle=2)
        for tweet in new_tweets:
            ids = map(lambda x: x.id, tweets)
            if ids.count(tweet.id) == 0:
                tweets.append(tweet)

    return tweets
예제 #22
0
파일: mine.py 프로젝트: nirabhratapaswi/nlp
def get_info(search_query):
	if isinstance(search_query, str):
		search_query = str(search_query)
	else:
		return { "Error": "Pass a string, from mine.py [7]", "Result": [None] }

	result = []
	engineGoogle = Google(license=None, throttle=0.5, language=None)
	engineBing = Bing(license=None, throttle=0.5, language=None)
	engineTwitter = Twitter(license=None, throttle=0.5, language=None)
	engineFacebook = Facebook(license=None, throttle=1.0, language='en')
	engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None)
	engineFlickr = Flickr(license=None, throttle=5.0, language=None)
	engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr]
	engineArray = [engineGoogle, engineTwitter]

	'''
	for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)])
		[result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray]
			# print repr(plaintext(para.text))
			# print repr(plaintext(para.url)) + '\n\n'
			# result.append(repr(plaintext(para.text)))
	'''

	# Google
	for i in range(1, 5):
		result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)])
		
	for i in range(1, 5):
		result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	'''
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)])
	for i in range(1,2):
		result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)])
	'''

	return { "Error": None, "Result": result }

	# return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
예제 #23
0
def get_info(search_query):
    if isinstance(search_query, str):
        search_query = str(search_query)
    else:
        return {"Error": "Pass a string, from mine.py [7]"}

    google = [{'text': '', 'url': '', 'title': ''}]
    twitter = [{'text': '', 'url': '', 'title': ''}]
    engineGoogle = Google(license=None, throttle=0.5, language=None)
    # engineBing = Bing(license=None, throttle=0.5, language=None)
    engineTwitter = Twitter(license=None, throttle=0.5, language=None)
    # engineFacebook = Facebook(license=None, throttle=1.0, language='en')
    # engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None)
    # engineFlickr = Flickr(license=None, throttle=5.0, language=None)
    # engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr]
    engineArray = [engineGoogle, engineTwitter]

    # Google
    for i in range(1, 2):
        for para in engineGoogle.search(search_query,
                                        type=SEARCH,
                                        start=i,
                                        count=5):
            google.append({
                'text': para.text,
                'url': para.url,
                'title': para.title
            })
        #resultGoogle = resultGoogle + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)])
    # Twitter
    for i in range(1, 2):
        for para in engineTwitter.search(search_query,
                                         type=SEARCH,
                                         start=i,
                                         count=5):
            twitter.append({
                'text': para.text,
                'url': para.url,
                'title': para.title
            })
        #resultTwitter = resultTwitter + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])

    # print 'From data_mine.py --> google: ', google, ', twitter: ', twitter

    return {"Error": None, "Google": google, "Twitter": twitter}
예제 #24
0
def main():
    # user input
    parser = argparse.ArgumentParser(description='Downloads tweets for a given search word')
    parser.add_argument('--term', help='Term to search tweets',required=True)
    parser.add_argument('--out', help='Output CSV file name', default='tweets.csv')
    args = parser.parse_args()
    # Twitter engine
    engine = Twitter(language='en')
    term = " ".join(args.term.split("_"))
    mkdir_p(os.path.dirname(args.out))
    with open(args.out, "w") as outfile:
    	print("Searching for tweets with '{}'".format(term))
        writer = csv.writer(outfile, delimiter=',', quotechar='\"', quoting=csv.QUOTE_ALL)
        # download tweets
        for tweet in engine.search(term, cached = False, start=1, count=30):
            csvrow = tweet.text.encode('utf-8')
            # write into CSV file
            writer.writerow([csvrow])
예제 #25
0
파일: kernel.py 프로젝트: willyg302/Parrot
	def search(self, args):
		"""
		Usage:
		  search [-fty] <keyword>
		  search -h | --help

		Options:
		  -h --help      Show this help message.
		  -f --facebook  Search for keyword on Facebook.
		  -t --twitter   Search for keyword on Twitter.
		  -y --youtube   Search for keyword on YouTube.
		"""

		# Example args information:
		# {'--facebook': False,
		# '--help': False,
		# '--twitter': True,
		# '--youtube': False,
		# '': 'f'}

		engine = Twitter(language='en')
		ret = []

		'''
		generator = ({
			'text': tweet.text,
			'author': tweet.author,
			'date': tweet.date,
			'hashtags': hashtags(tweet.text)
		} for tweet in engine.search('is cooler than', count=25, cached=False))

		self.db.bulk_insert('test', generator)
		'''
		
		for tweet in engine.search('is cooler than', count=25, cached=False):
			ret.append({
				'text': tweet.text,
				'author': tweet.author,
				'date': tweet.date,
				'hashtags': hashtags(tweet.text)
			})
		

		return str(ret)
def Generate_Tweets(searchterm,filename_label):
	twitter_obj=Twitter(license=None, throttle=0.5,language='en')
	#throttle: time between requests.
	#now the twitter_obj can be searched, with the following parameters.
	
	# Twitter returns up to 1500 results for a search term. It has hourly limit of 150 queries. each call 	to search() is one query. So you can get like 15 queries of 100 each of 150 queries of 10 each.
	#  Parameters for Twitter: 
	#  Start 1-1500/count
	#  count: results per page=1-100
        #  SORT: RELEVANCY, Limit: 150/hour, throttle =0.5
	f=open(filename_label,'a')

	for tweet in twitter_obj.search(searchterm,cached=False,language='en', sort ='RELEVANCY',count=100):
		unicode_tweet=plaintext(tweet.description)
		#Tweets are unicode, need to be converted to ascii before storing in file
		ascii_tweet=unicode_tweet.encode('ascii','ignore')
		f.write(ascii_tweet+'\n')
	
	f.close()
예제 #27
0
def poli_twitter_analysis():
	"""This function parses Twitter to determine the average sentiment towards political figures during an event"""
	
	candidates = ['trump','walker', 'fiorina', 'carson', 'cruz', 'rubio', 'huckabee', 'paul', 'kasich','christie', 'bush','clinton','sanders',"o'malley"] #list of searches to use

	twtNum = 50 #number of tweets to search for each time
	
	t = Twitter() 
	i = None
	twtstext = []
	twtsdate = []
	twtsauthor = []
	twtscandi = []
	twtssenti = []

	for item in candidates:
		for j in range(1):
			for tweet in t.search(item, start=i, count=twtNum):
				twtscandi.append(item)
				twtstext.append(tweet.text)
				m = tweet.text
				twtsdate.append(tweet.date)
				twtsauthor.append(tweet.author)
				[senti,objec] = sentiment(m)
				twtssenti.append(senti)

	zipped1 = zip(twtscandi, twtssenti)
	zipped2 = zip(twtscandi, twtsdate, twtsauthor, twtstext, twtssenti)
	
	timestr = time.strftime("%Y%m%d%H%M%S")

	filename = timestr + '.txt'
	f = open(filename, 'w')
	f.write(' '.join(map(str, zipped1)))
	f.close()

	filename = 'tweets_' + timestr + '.txt'
	f = open(filename, 'w')
	f.write(' '.join(map(str, zipped2)))
	f.close()

	print 'Complete'
from pattern.web import Twitter

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search("signing day", start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text
        print
예제 #29
0
    # We only want to add the latest tweets, i.e., those we haven't previously encountered.
    # With an index on the first column we can quickly check if an ID already exists.
    # The index becomes important once more and more rows are added to the table (speed).
    table = Datasheet.load("current_tweets.csv")
    index = dict.fromkeys(table.columns[0], True)
except:
    table = Datasheet()
    index = {}

engine = Twitter(language="en")
tweet_csv = []
table = []
for twitter_subject in twitter_subjects:
    # With cached=False, a live request is sent to Twitter,
    # so we get the latest results for the query instead of those in the local cache.
    for tweet in engine.search(twitter_subject, count=275, cached=False):
        # Create a unique ID based on the tweet content and author.
        new_line = '@' + tweet.author + ' , ' + tweet.description + ' , ' + str(
            tweet.values()[5]) + ' , ' + str(tweet.url)
        id = hash(tweet.author + tweet.description)
        # Only add the tweet to the table if it doesn't already contain this ID.
        if len(table) == 0 or id not in index:
            tweet_csv.append(new_line)
            norm_descr = unicodedata.normalize('NFKD',
                                               tweet.description).encode(
                                                   'ascii', 'ignore')
            norm_author = unicodedata.normalize('NFKD', tweet.author).encode(
                'ascii', 'ignore')
            table = table + ['@' + str(norm_author) + ' ' + str(norm_descr)]
            index[id] = True
예제 #30
0
#!/usr/bin/python

from pattern.web import Twitter, plaintext


twitter_api = Twitter(language='en')

tweets = twitter_api.search("@", count=2)
for tweet in tweets:
    text = tweet.text
    print text
예제 #31
0
from pattern.web import Twitter, plaintext

twitter = Twitter(language='en')
for tweet in twitter.search('"more important than"', cached=False):
    print plaintext(tweet.text)
예제 #32
0
from pattern.web import Twitter

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search('college', start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text
        print tweet.latitude
        print
예제 #33
0
    # We store tweets in a Table that can be saved as a text file.
    # In the first column, we'll store a unique ID for each tweet.
    # We only want to add the latest tweets, i.e. those we haven't previously encountered.
    # With an index() on the first column we can quickly check if an ID already exists.
    # The index becomes important once more and more rows are added to the table (speed).
    table = Table.load("cool.txt")
    index = table.index(table.columns[0])
except:
    table = Table()
    index = {}

engine = Twitter()

# With cached=False, a live request is sent to Twitter,
# so we get the latest results for the query instead of those in the local cache.
for tweet in engine.search("is cooler than", count=25, cached=False):
    print tweet.description
    print tweet.author
    print tweet.date
    print hashtags(tweet.description)  # Keywords in tweets start with a #.
    print
    # Create a unique ID based on the tweet content and author.
    id = hash(tweet.author + tweet.description)
    # Only add the tweet to the table if it doesn't already contain this ID.
    if len(table) == 0 or id not in index:
        table.append([id, tweet.description])
        index[id] = True

table.save("cool.txt")

print "Total results:", len(table)
예제 #34
0
from pattern.web import Twitter

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search('Snowmageddon', start=i, count=1000):
        print tweet.id
        print tweet.name
        print tweet.text
        print
예제 #35
0
class Ui_Dialog(object):
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(823, 677)
        self.label = QtGui.QLabel(Dialog)
        self.label.setGeometry(QtCore.QRect(10, 10, 800, 400))
        self.label.setFrameShape(QtGui.QFrame.WinPanel)
        self.label.setText("")
        self.label.setObjectName("label")
        self.listWidget = QtGui.QListWidget(Dialog)
        self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192))
        self.listWidget.setObjectName("listWidget")
        self.widget = QtGui.QWidget(Dialog)
        self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25))
        self.widget.setObjectName("widget")
        self.horizontalLayout = QtGui.QHBoxLayout(self.widget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.label_2 = QtGui.QLabel(self.widget)
        self.label_2.setObjectName("label_2")
        self.horizontalLayout.addWidget(self.label_2)
        self.lineEdit = QtGui.QLineEdit(self.widget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.pushButton = QtGui.QPushButton(self.widget)
        self.pushButton.setObjectName("pushButton")
        self.horizontalLayout.addWidget(self.pushButton)

        self.retranslateUi(Dialog)
        QtCore.QMetaObject.connectSlotsByName(Dialog)
        #
        self.pushButton.clicked.connect(self.on_buttom_pressed)
        self.listWidget.doubleClicked.connect(self.goTweet)

        #
        self.alText = u''
        self.fullText = u''
        self.twitter = Twitter(language='tr')
        self.prevId = None
        self.timer = QtCore.QTimer(Dialog)
        self.timer.timeout.connect(self.on_timer)
        self.dialog = Dialog
        self.twIds = []

    def retranslateUi(self, Dialog):
        Dialog.setWindowTitle(
            QtGui.QApplication.translate("Dialog", "Twitter Gözetleyici", None,
                                         QtGui.QApplication.UnicodeUTF8))
        self.label_2.setText(
            QtGui.QApplication.translate("Dialog", "Anahtar Kelime :", None,
                                         QtGui.QApplication.UnicodeUTF8))
        self.pushButton.setText(
            QtGui.QApplication.translate("Dialog", "Gözetle", None,
                                         QtGui.QApplication.UnicodeUTF8))

    #
    def on_buttom_pressed(self):
        if self.timer.isActive():
            self.timer.stop()
            self.pushButton.setText(u'Gözetle')
        else:
            self.listWidget.clear()
            self.twIds = []
            self.fullText = u''
            self.on_timer()
            self.timer.start(60000)
            self.pushButton.setText('Durdur !')

        return

    def on_timer(self):
        searchKey = self.lineEdit.text()
        self.getTwits(searchKey)
        self.filterWords()
        self.fullText = self.fullText + self.alText
        self.showWordCloud()

    def showWordCloud(self):
        wordcloud = WordCloud(width=800, height=400).generate(self.fullText)
        img = np.array(wordcloud.to_image())
        height, width, byteValue = img.shape
        byteValue = byteValue * width
        image = QtGui.QImage(img.data, width, height, byteValue,
                             QtGui.QImage.Format_RGB888)
        pxmp = QtGui.QPixmap(image)
        self.label.setPixmap(pxmp)

    def filterWords(self):
        # sık geçen kelimeler filitreleniyor eksik elbette....
        flt = [
            u'https', u'nin', u'bir', u'daha', u'diye', u'için', u'gibi',
            u'işte', u'ile', u'değil', u'ben', u'sen', u'çok', u'ama', u'Sen',
            u'den', u'htt'
        ]
        derle = re.compile("\w*", re.UNICODE)
        wL = re.findall(derle, self.alText)
        temp = []
        for w in wL:
            if len(w) < 3:
                continue
            elif w in flt:
                continue
            else:
                #print w
                temp.append(w)
        self.alText = ' '.join(temp)

    def getTwits(self, keyWord):
        if len(keyWord) == 0:
            keyWord = u'"gündem"'
            self.lineEdit.setText(keyWord)
        self.alText = u''
        try:
            tList = self.twitter.search(keyWord,
                                        start=self.prevId,
                                        count=10,
                                        cached=False)

        except:
            message = "Twitter Aram Limiti Lütfen Biraz Bekleyin"
            QtGui.QMessageBox.information(self.dialog, "Information",
                                          "Python rocks!")

        for tweet in tList:
            self.listWidget.addItem(
                QtGui.QListWidgetItem(cleanTweet(tweet.text)))
            self.twIds.append(tweet.id)
            self.listWidget.setCurrentRow(self.listWidget.count() - 1)
            tweet.text = self.filterRT(tweet.text)
            tweet.text = self.filterLink(tweet.text)
            self.alText = self.alText + plaintext(tweet.text) + u' '
            self.prevId = tweet.id

    def filterRT(self, tweet):
        # RT başlığı filtreleniyor
        buf = tweet[:2]
        if buf == u'RT':
            ix = tweet.find(':')
            tweet = tweet[ix:]
        return tweet

    def filterLink(self, tweet):
        regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
        match = re.search(regex, tweet)
        buf = tweet
        if match:
            ixs = tweet.find(match.group())
            ixe = len(match.group())
            try:
                buf = tweet[:ixs]
            except:
                print "not removed"
        return buf

    def goTweet(self):
        i = self.listWidget.currentRow()
        urlTw = 'https:/' + '/twitter.com/statuses/' + str(self.twIds[i])
        webbrowser.open(urlTw)
예제 #36
0
파일: 04-KNN.py 프로젝트: ADA110/Cibus
# to learn how to label unlabeled documents.

# This example trains a simple classifier with Twitter messages.
# The idea is that, if you have a number of texts with a "type"
# (mail/spam, positive/negative, language, author's age, ...),
# you can predict the type of other "unknown" texts.
# The k-Nearest Neighbor algorithm classifies texts according
# to the k documents that are most similar (cosine similarity) to the given input document.

m = Model()
t = Twitter()

# First, we mine a model of a 1000 tweets.
# We'll use hashtags as type.
for page in range(1, 10):
    for tweet in t.search('#win OR #fail', start=page, count=100, cached=True):
        # If the tweet contains #win hashtag, we'll set its type to 'WIN':
        s = tweet.text.lower()  # tweet in lowercase
        p = '#win' in s and 'WIN' or 'FAIL'  # document labels
        s = Sentence(parse(s))  # parse tree with part-of-speech tags
        s = search('JJ', s)  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)  # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))

# Train k-Nearest Neighbor on the model.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
예제 #37
0
from pattern.db  import Datasheet, pprint

engine1 = Twitter(language="en")
engine2 = Twitter(language="en")


print "-------------------------------------"
print "Tweets in Boston, MA ... "


bosCount=0
sfoCount=0 

keyword = " "

for tweet in engine1.search(keyword , geocode="42.3583333,-71.0602778,25mi" , count=400, cached=True):
    print "-> BOSTON "
    print tweet.author
    print tweet.text
    print tweet.date
    bosCount += 1



print "-------------------------------------"
print "Tweets in San Francisco, CA ... "

for tweet in engine2.search(keyword, geocode="37.781157,-122.398720,25mi", count=400, cached=True):
    print "-> SAN FRANCISCO "
    print tweet.author
    print tweet.text
    table = Datasheet.load(pd("eulogy.csv"))
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

engine = Twitter(language="en")

# With Twitter.search(cached=False), a "live" request is sent to Twitter:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
prev = None
for i in range(2):
    print(i)
    for tweet in engine.search("eulogy", start=prev, count=25, cached=False):
        print("")
        print(tweet.text)
        print(tweet.author)
        print(tweet.date)
        print(hashtags(tweet.text))  # Keywords in tweets start with a "#".
        print("")
        # Only add the tweet to the table if it doesn't already exists.
        if len(table) == 0 or tweet.id not in index:
            table.append([tweet.id, tweet.text])
            index.add(tweet.id)
        # Continue mining older tweets in next iteration.
        prev = tweet.id

# Create a .csv in pattern/examples/01-web/
table.save(pd("eulogy_july_21.csv"))
예제 #39
0
from pattern.web import Twitter
from textblob import TextBlob

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search("college", start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text

        # Pull nouns from tweet
        blob = TextBlob(tweet.text)
        print blob.noun_phrases

        print
예제 #40
0
enough = False
last = None
i = 0
sentences = []
numwords = 0

firsttime = True

while not enough:
	try:
	
		if not firsttime:
			sleep(15)
			firsttime = False
		
		for tweet in twitter.search('#nanowrimo', start=last, count=100):
			i = i + 1
			if tweet.id == STOP_AT:
				print "Reached STOP_AT tweet"
				enough = True
				break
				
			#print i, plaintext(tweet.text)
			last = tweet.id
	
			# skip any tweet with funny characters
			m = re.search(r"[^\w\d\s\'\"\,\.\?\(\)\!\#\@\:]", tweet.text)
			if m:
				#print i, m.group(0), tweet.text
				continue
	
from pattern.web import Twitter
from textblob import TextBlob

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search('college', start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text

        blob = TextBlob(tweet.text)
        # Pull nouns from tweet
        print blob.noun_phrases
        # tweet's sentiment analysis
        for sentence in blob.sentences:
            print(sentence.sentiment.polarity)

        print
예제 #42
0
#%%%Data Mining
from pattern.web import Google
google = Google()
for results in google.search('Analytics India Magazine'):
    print(results.url)
    print(results.text)

for results in google.search('Gamification'):
    print(results.url)

#twitter
from pattern.web import Twitter

twitter = Twitter()

for results in twitter.search('Analytics India Magazine'):
    print(results.url)
    print(results.text)

for results in twitter.search('Gamification'):
    print(results.url)

#flickr
from pattern.web import Flickr
flickr = Flickr(license=None)
for result in flickr.search('Analytics India Magazine'):
    print(result.url)
    print(result.text)

#%%%Accessing Web Pages
#The URL object is used to retrieve contents from the webpages. It has several methods that can be used to open a webpage, download the contents from a webpage and read a webpage.
예제 #43
0
def main():

    # First two vars hold the number of relevant sentences, the 2 others the float values
    police_killer_i = 0
    police_killed_i = 0
    police_killer_value = 0.0
    police_killed_value = 0.0
    total_sentences = 0

    # Init Twitter query engine
    engine = Twitter(license=None, language='en')
    results_list = []
    print('Performing twitter queries...')

    # 4 differents queries with 100 results each = 400 results
    results_list.append(
        engine.search('policeman kill', start=1, count=100, cached=False))
    results_list.append(
        engine.search('policeman killed', start=1, count=100, cached=False))
    results_list.append(
        engine.search('police kill', start=1, count=100, cached=False))
    results_list.append(
        engine.search('police killed', start=1, count=100, cached=False))

    #print lemma('shot')

    # Open a file to put some recognized examples
    examples_file = open('examples.txt', 'w')

    # For each list of results
    for ii in xrange(len(results_list)):
        print('Starting to analyze query results: ' + str(ii + 1) +
              ' out of ' + str(len(results_list)))
        for res in results_list[ii]:
            # Parse and split the tweet in sentences
            s = parse(string.lower(res.description),
                      chunks=True,
                      relations=True,
                      lemmata=True)
            #s = parse(string.lower(res), chunks=True, relations=True, lemmata=True)
            #pprint(s)

            ss = split(s)

            # Then for each sentence
            for sent in ss:
                # Update sentences number
                total_sentences += 1

                found = False
                i = 0
                value = 0.0

                # First check the affidability of the sentence - .5 if a bad word is found, 1.0 otherwise
                while (not found and (i < len(sent.words))):
                    #print sent.words[i]
                    if (sent.words[i].string in PROFANITY):
                        found = True
                    i = i + 1
                if (found):
                    #print('Found a bad word')
                    value = 0.5
                else:
                    # No bad words found -> giving max affidability value
                    value = 1.0

                #print sent.chunks
                # Here, I want to clear the sentence from the PNP elements. I will filter out the words belonging to PNP from the list
                cleared_sentence_words = filter(lambda (i): i.pnp is None,
                                                sent.words)
                cleared_string = ''

                # But now, seems like there's no way to reconstruct a parsed sentence but assembling again a string and parsing it again
                for word in cleared_sentence_words:
                    cleared_string += ' ' + word.string
                #print cleared_string
                cleared_sentence = parse(cleared_string,
                                         chunks=True,
                                         relations=True,
                                         lemmata=True)
                cleared_sentence = split(cleared_sentence)
                #pprint(cleared_sentence)
                sentence_type1 = False

                # Now cleared sentence is a sentence without PNP
                # Check if it is a standard active sentence
                for match in search('NP kill NP', cleared_sentence):
                    # It is
                    sentence_type1 = True
                    # Check if the Subject is the police
                    if (match.constituents()[0].role == 'SBJ'):
                        for word in match.constituents()[0].words:
                            if word.string in search_list:
                                police_killer_i += 1
                                police_killer_value += value
                                #print('Police killed')
                                # Print to the examples' file the recognized match
                                for sword in match.words:
                                    examples_file.write(
                                        str(sword.string.encode("utf-8")) +
                                        ' ')
                                examples_file.write('\r\n')
                                #examples_file.write(str(match.words)+'\r\n');
                                examples_file.write(
                                    '   Recognized as: police killed somebody'
                                    + '\r\n')
                                examples_file.write(
                                    '   TYPE: ACTIVE - SUBJECT' + '\r\n')
                                examples_file.write('\r\n')

                    if (len(match.constituents()) > 2):
                        # Or check if it is object
                        if (match.constituents()[2].role == 'OBJ'):
                            for word in match.constituents()[2].words:
                                if word.string in search_list:
                                    police_killed_i += 1
                                    police_killed_value += value
                                    #print('Killed by police')
                                    # Print to the example file the recognized match
                                    for sword in match.words:
                                        examples_file.write(
                                            str(sword.string.encode("utf-8")) +
                                            ' ')
                                    examples_file.write('\r\n')
                                    examples_file.write(
                                        '   Recognized as: police killed by somebody'
                                        + '\r\n')
                                    examples_file.write(
                                        '   TYPE: ACTIVE - OBJECT' + '\r\n')
                                    examples_file.write('\r\n')

                # If it was not an active sentence, check if it is a passive one
                if (not sentence_type1):
                    #print('Try type 2')
                    for match in search('NP kill (PP)+ (NP)+',
                                        cleared_sentence):
                        # Here, the problem is that match.constituents returns a mixed list which can contain both Chunks and Words
                        # We are interested in tags, hence in Chunks, hence we need to do some isinstance non-safe tricks
                        # Checking the subject
                        if (isinstance(match.constituents()[0], Chunk)):
                            if (match.constituents()[0].role == 'SBJ'):
                                #print('Is subject')
                                for word in match.constituents()[0]:
                                    #for word in match.chunks()[0]:
                                    if word.string in search_list:
                                        police_killer_i += 1
                                        police_killer_value += value
                                        # Print to the example file the recognized match
                                        for sword in match.words:
                                            examples_file.write(
                                                str(
                                                    sword.string.encode(
                                                        "utf-8")) + ' ')
                                        examples_file.write('\r\n')
                                        examples_file.write(
                                            '   Recognized as: police killed somebody'
                                            + '\r\n')
                                        examples_file.write(
                                            '   TYPE: PASSIVE - SUBJECT - CHUNK'
                                            + '\r\n')
                                        examples_file.write('\r\n')

                        elif (isinstance(match.constituents()[0], Word)):
                            if match.constituents()[0].string in search_list:
                                police_killer_i += 1
                                police_killer_value += value
                                #print('Killed by police')
                                # Print to the example file the recognized match
                                for sword in match.words:
                                    examples_file.write(
                                        str(sword.string.encode("utf-8")) +
                                        ' ')
                                examples_file.write('\r\n')
                                examples_file.write(
                                    '   Recognized as: police killed somebody'
                                    + '\r\n')
                                examples_file.write(
                                    '   TYPE: PASSIVE - SUBJECT - WORD' +
                                    '\r\n')
                                examples_file.write('\r\n')

                        # Checking the object. First I have to filter out the Word objects from the match results to see if I have enough Chunks
                        if (len(
                                filter(lambda (i): isinstance(i, Chunk),
                                       match.constituents())) == 4):
                            if (match.constituents()[3].role == 'OBJ'):
                                for word in match.constituents()[3]:
                                    if word.string in search_list:
                                        police_killed_i += 1
                                        police_killed_value += value
                                        # Print to the example file the recognized match
                                        for sword in match.words:
                                            examples_file.write(
                                                str(
                                                    sword.string.encode(
                                                        "utf-8")) + ' ')
                                        examples_file.write('\r\n')
                                        examples_file.write(
                                            '   Recognized as: police was killed by somebody'
                                            + '\r\n')
                                        examples_file.write(
                                            '   TYPE: PASSIVE - OBJECT - CHUNK'
                                            + '\r\n')
                                        examples_file.write('\r\n')
예제 #44
0
from pattern.web import Twitter
from pattern.en import sentiment

sent = []

t = Twitter()
i = None
cc = 0
for tweet in t.search('google', count=100, lang='en'):

    print(str(cc) + ':  ' + tweet.text + '\n')
    sent.append([sentiment(tweet.text)])
    cc = cc + 1
예제 #45
0
from pattern.web import Google

google = Google(license=None)
for search_result in google.search('artificial intelligence'):
    print(search_result.url)
    print(search_result.text)

# #### Twitter

from pattern.web import Twitter

twitter = Twitter()
index = None
for j in range(3):
    for tweet in twitter.search('artificial intelligence',
                                start=index,
                                count=3):
        print(tweet.text)
        index = tweet.id

# ### Converting HTML Data to Plain Text

from pattern.web import URL, plaintext

html_content = URL(
    'https://stackabuse.com/python-for-nlp-introduction-to-the-textblob-library/'
).download()
cleaned_page = plaintext(html_content.decode('utf-8'))
print(cleaned_page)

# ### Parsing PDF Documments
예제 #46
0
from pattern.web import Twitter

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search('SunTrust', start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text
        print
예제 #47
0
# We can use it to check if we have already seen a tweet,
# so we don't store it twice.

# Search for tweets containing the given search query:
q = "charlie hebdo"

twitter = Twitter(language="en", license=None)

id = None
for i in range(3):
    # Look for tweets containing the search query.
    # We can get a maximum of 100 tweets per search.
    # Don't cache the results locally,
    # so we get the latest new tweets when the script runs.
    # Do this 3x.
    for tweet in twitter.search(q, start=id, count=100, cached=False):
        id = tweet.id
        if id not in seen:
            print tweet.text
            print
            seen.add(id)
            csv.append([
                tweet.id, q, tweet.author, tweet.text, tweet.retweets,
                tweet.date
            ])
    # Update the CSV.
    csv.save(PATH)

# Each time you run the script, new tweets will be appended to the CSV.
# For example, we have Twitter miners that automatically run 10x each day,
# and have been running for many days and weeks.
    def getTweetSecureLoad(self, topic):
        # This example retrieves tweets containing given keywords from Twitter.

        self.search_topic = topic
        self.search_topic = topic + ' film'
        '''
        print 'CLASS (Twitter_PatternPKG) - Twitter Secure Initial Load - Topic: ' + self.search_topic
        try: 
            # We'll store tweets in a Datasheet.
            # A Datasheet is a table of rows and columns that can be exported as a CSV-file.
            # In the first column, we'll store a unique id for each tweet.
            # We only want to add the latest tweets, i.e., those we haven't seen yet.
            # With an index on the first column we can quickly check if an id already exists.
            # The pd() function returns the parent directory of this script + any given path.

            table = Datasheet.load(pd(self.FILE_STORAGE))
            # index = set(table.columns[0])
            index = set(table.columns[4])   # on the text
            
        except:
            table = Datasheet()
            index = set()
        '''

        table = Datasheet()
        index = set()

        engine = Twitter(language="en")

        # With Twitter.search(cached=False), a "live" request is sent to Twitter:
        # we get the most recent results instead of those in the local cache.
        # Keeping a local cache can also be useful (e.g., while testing)
        # because a query is instant when it is executed the second time.
        prev = None

        #searchThisSubjects = search_topic

        # put headers
        table.append(["tweet_id", "tweet_date", "InputSubject", "Tweet_text"])

        #for oneSubject in searchThisSubjects:
        oneSubject = self.search_topic
        # oneSubject

        tweet_list_Json = []  # list of JSons
        tweet_list = []
        try:
            for i in range(1):
                for tweet in engine.search(oneSubject,
                                           start=prev,
                                           count=8,
                                           cached=False):
                    if 'http' in tweet.text:
                        posi = tweet.text.index('http')
                        tweet.text = tweet.text[0:posi - 1]

                    # Only add the tweet to the table if it doesn't already exists.
                    if len(table) == 0 or tweet.text not in index:
                        table.append(
                            [tweet.id, tweet.date, oneSubject, tweet.text])
                        index.add(tweet.text)

                        tweet_list.append(
                            [tweet.id, tweet.date, oneSubject, tweet.text])
                        #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject, tweet.text)
                        #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject.replace(' film', ''), tweet.text)
                        tweet.text = filter(lambda x: x in string.printable,
                                            tweet.text)  # remove weird stuff
                        tweet.text = tweet.text.replace(
                            '"', '')  # remove weird stuff
                        tweet.text = tweet.text.replace(
                            '\n', '')  # remove weird stuff
                        tweetJson = self.formatData2Json(
                            tweet.id, tweet.date,
                            oneSubject.replace(' film', ''),
                            tweet.text)  # remove artificiall film

                        tweet_list_Json.append(tweetJson)
                        #print tweetJson

                        # BUILD A JSON
                        #http://stackoverflow.com/questions/14547916/how-can-i-loop-over-entries-in-json
                        #BUILD A LIST OF DICTIONARIES
                        #http://stackoverflow.com/questions/2733813/iterating-through-a-json-object

                    # Continue mining older tweets in next iteration.
                    prev = tweet.text

        except Exception:
            ''' 
            print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!! - ([twitter_patternPkg_connector] getTweetSecureLoad)'
            '''
            print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!!   (film: ' + oneSubject + ')'
            pass

        # Create a .csv in pattern/examples/01-web/
        # table.save(pd("OD_CK1_Source4_Tweeter_InitialLoad.csv"))
        print "CLASS (Twitter_PatternPKG) - Total Secure Twitter Load: " + str(
            len(table)) + '\n'
        #print json.dumps(tweet_list)

        # return tweet_list
        return tweet_list_Json
예제 #49
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr  3 01:54:18 2019

@author: abhijithneilabraham
"""

from pattern.web import Twitter
from pattern.en import tag
from pattern.vector import KNN, count

twitter, knn = Twitter(), KNN()

for i in range(1, 3):
    for tweet in twitter.search('#github OR #kaggle', start=i, count=100):
        s = tweet.text.lower()
        p = '#github' in s and 'WIN' or 'FAIL'
        v = tag(s)
        v = [word for word, pos in v if pos == 'JJ'] # JJ = adjective
        v = count(v) # {'sweet': 1}
        if v:
            knn.train(v, type=p)

print(knn.classify('sweet potato burger'))
print(knn.classify('stupid autocorrect'))
예제 #50
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 25 19:37:34 2019

@author: alternatif
"""

from pattern.web import Twitter
from pattern.en import tag
from pattern.vector import KNN, count

twitter, knn = Twitter(), KNN()

for i in range(1, 3):
    for tweet in twitter.search('#win OR #fail', start=i, count=100):
        s = tweet.text.lower()
        p = '#win' in s and 'WIN' or 'FAIL'
        v = tag(s)
        v = [word for word, pos in v if pos == 'JJ']  # JJ = adjective
        v = count(v)  # {'sweet': 1}
        if v:
            knn.train(v, type=p)

print(knn.classify('sweet potato burger'))
print(knn.classify('stupid autocorrect'))
예제 #51
0
from pattern.web import Twitter

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search('baseball', start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text
        print
예제 #52
0
파일: 03-twitter.py 프로젝트: sp00/pattern
    # We store tweets in a Datasheet that can be saved as a text file (comma-separated).
    # In the first column, we'll store a unique ID for each tweet.
    # We only want to add the latest tweets, i.e., those we haven't previously encountered.
    # With an index on the first column we can quickly check if an ID already exists.
    # The index becomes important once more and more rows are added to the table (speed).
    table = Datasheet.load("cool.txt")
    index = dict.fromkeys(table.columns[0], True)
except:
    table = Datasheet()
    index = {}

engine = Twitter(language="en")

# With cached=False, a live request is sent to Twitter,
# so we get the latest results for the query instead of those in the local cache.
for tweet in engine.search("is cooler than", count=25, cached=False):
    print tweet.description
    print tweet.author
    print tweet.date
    print hashtags(tweet.description)  # Keywords in tweets start with a #.
    print
    # Create a unique ID based on the tweet content and author.
    id = hash(tweet.author + tweet.description)
    # Only add the tweet to the table if it doesn't already contain this ID.
    if len(table) == 0 or id not in index:
        table.append([id, tweet.description])
        index[id] = True

table.save("cool.txt")

print "Total results:", len(table)
    def getTweetSecureLoad(self, topic):
        # This example retrieves tweets containing given keywords from Twitter.

        self.search_topic = topic
        print 'CLASS (Twitter_PatternPKG) - Twitter Secure Initial Load - Topic: ' + self.search_topic
        self.search_topic = topic + ' film'
        try: 
            # We'll store tweets in a Datasheet.
            # A Datasheet is a table of rows and columns that can be exported as a CSV-file.
            # In the first column, we'll store a unique id for each tweet.
            # We only want to add the latest tweets, i.e., those we haven't seen yet.
            # With an index on the first column we can quickly check if an id already exists.
            # The pd() function returns the parent directory of this script + any given path.

            table = Datasheet.load(pd(self.FILE_STORAGE))
            # index = set(table.columns[0])
            index = set(table.columns[4])   # on the text
            
        except:
            table = Datasheet()
            index = set()

        engine = Twitter(language="en")

        # With Twitter.search(cached=False), a "live" request is sent to Twitter:
        # we get the most recent results instead of those in the local cache.
        # Keeping a local cache can also be useful (e.g., while testing)
        # because a query is instant when it is executed the second time.
        prev = None

        #searchThisSubjects = search_topic

        # put headers
        table.append(["tweet_id", "tweet_date", "InputSubject", "Tweet_text"])

        #for oneSubject in searchThisSubjects:
        oneSubject = self.search_topic
        # oneSubject

        tweet_list_Json = []  # list of JSons
        tweet_list = []
        try:
            for i in range(1):
                for tweet in engine.search(oneSubject, start=prev, count=8, cached=False):
                    if 'http' in tweet.text:
                        posi = tweet.text.index('http')
                        tweet.text = tweet.text[0:posi-1]
                                
                    # Only add the tweet to the table if it doesn't already exists.
                    if len(table) == 0 or tweet.text not in index :
                        table.append([tweet.id, tweet.date, oneSubject, tweet.text])
                        index.add(tweet.text)
                        
                        tweet_list.append([tweet.id, tweet.date, oneSubject, tweet.text])
                        #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject, tweet.text)
                        #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject.replace(' film', ''), tweet.text)
                        tweet.text = filter(lambda x: x in string.printable, tweet.text) # remove weird stuff
                        tweet.text = tweet.text.replace('"', '') # remove weird stuff
                        tweet.text = tweet.text.replace('\n', '') # remove weird stuff
                        tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject.replace(' film', ''), tweet.text) # remove artificiall film 
                        
                        tweet_list_Json.append(tweetJson)
                        #print tweetJson  
                        
                        # BUILD A JSON
                        #http://stackoverflow.com/questions/14547916/how-can-i-loop-over-entries-in-json
                        #BUILD A LIST OF DICTIONARIES                    
                        #http://stackoverflow.com/questions/2733813/iterating-through-a-json-object
                        
                        
                    # Continue mining older tweets in next iteration.
                    prev = tweet.text
    
        except Exception:
            print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!! - ([twitter_patternPkg_connector] getTweetSecureLoad)'
            print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!!   (film: ' + oneSubject +')' 
            pass
        
        # Create a .csv in pattern/examples/01-web/
        # table.save(pd("OD_CK1_Source4_Tweeter_InitialLoad.csv"))
        print "CLASS (Twitter_PatternPKG) - Total Secure Twitter Load: " +  str(len(table)) + '\n'
        #print json.dumps(tweet_list)
        
        # return tweet_list
        return tweet_list_Json
예제 #54
0
from pattern.web import Twitter, plaintext
twitter = Twitter(language='en') 
for tweet in twitter.search('"@snowden"', cached=False):
    print plaintext(tweet.text)
예제 #55
0
파일: 04-twitter.py 프로젝트: Neo-X/pattern
    index = set()

engine = Twitter(language="en")

# With Twitter.search(cached=False), a "live" request is sent to Twitter:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
prev=None
groups = 1
for i in range(groups):
    # 49.253000,-123.111432,25mi
    # results = engine.search("#feelthebern", start=prev, count=100, cached=False, date='2016-02-14', geo=(latitude, longitude, radius))
    # results = engine.search("geocode:49.253000,-123.111432,50mi", start=prev, count=100, cached=False)
    # results = engine.search("#SingleLifeIn3Words", start=prev, count=100, cached=False, date='2016-02-14')
    results = engine.search("#Valentines OR #ValentinesDay", start=prev, count=100, cached=False, date='2016-02-15', since='2016-02-13')
    for tweet in results:
        print
        # print str(tweet.text)
        print tweet.author
        print tweet.date
        print hashtags(tweet.text) # Keywords in tweets start with a "#".
        print
        # Only add the tweet to the table if it doesn't already exists.
        if len(table) == 0 or tweet.id not in index:
            # remove new lines
            tweet.text = tweet.text.replace("\n", "")
            # tweet.raw = unicode(tweet.raw).encode('utf8').replace("\n", "")
            tweet.raw = json.dumps(tweet.raw, separators=(',', ': ')).replace("\n", "")
            table.append([tweet.id, tweet.text, tweet.date, tweet.language, tweet.shares, tweet.geo, tweet.geo_lat, tweet.geo_long, tweet.user_id, tweet.location, tweet.statuses_count, tweet.followers_count, tweet.friends_count, tweet.raw])
            index.add(tweet.id)
예제 #56
0
class Ui_Dialog(object):
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(823, 677)
        self.label = QtGui.QLabel(Dialog)
        self.label.setGeometry(QtCore.QRect(10, 10, 800, 400))
        self.label.setFrameShape(QtGui.QFrame.WinPanel)
        self.label.setText("")
        self.label.setObjectName("label")
        self.listWidget = QtGui.QListWidget(Dialog)
        self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192))
        self.listWidget.setObjectName("listWidget")
        self.widget = QtGui.QWidget(Dialog)
        self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25))
        self.widget.setObjectName("widget")
        self.horizontalLayout = QtGui.QHBoxLayout(self.widget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.label_2 = QtGui.QLabel(self.widget)
        self.label_2.setObjectName("label_2")
        self.horizontalLayout.addWidget(self.label_2)
        self.lineEdit = QtGui.QLineEdit(self.widget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.pushButton = QtGui.QPushButton(self.widget)
        self.pushButton.setObjectName("pushButton")
        self.horizontalLayout.addWidget(self.pushButton)

        self.retranslateUi(Dialog)
        QtCore.QMetaObject.connectSlotsByName(Dialog)
        #
        self.pushButton.clicked.connect(self.on_buttom_pressed)
        self.listWidget.doubleClicked.connect(self.goTweet)

        #
        self.alText = u''
        self.fullText = u''
        self.twitter = Twitter(language='tr')
        self.prevId = None
        self.timer = QtCore.QTimer(Dialog)
        self.timer.timeout.connect(self.on_timer)
        self.dialog = Dialog
        self.twIds = []


    def retranslateUi(self, Dialog):
        Dialog.setWindowTitle(QtGui.QApplication.translate("Dialog", "Twitter Gözetleyici", None, QtGui.QApplication.UnicodeUTF8))
        self.label_2.setText(QtGui.QApplication.translate("Dialog", "Anahtar Kelime :", None, QtGui.QApplication.UnicodeUTF8))
        self.pushButton.setText(QtGui.QApplication.translate("Dialog", "Gözetle", None, QtGui.QApplication.UnicodeUTF8))
    #
    def on_buttom_pressed(self):
        if self.timer.isActive() :
            self.timer.stop()
            self.pushButton.setText(u'Gözetle')
        else:
            self.listWidget.clear()
            self.twIds = []
            self.fullText = u''
            self.on_timer()
            self.timer.start(60000)
            self.pushButton.setText('Durdur !')

        return

    def on_timer(self):
        searchKey = self.lineEdit.text()
        self.getTwits(searchKey)
        self.filterWords()
        self.fullText = self.fullText + self.alText
        self.showWordCloud()


    def showWordCloud(self):
        wordcloud = WordCloud(width=800, height=400).generate(self.fullText)
        img = np.array(wordcloud.to_image())
        height, width, byteValue = img.shape
        byteValue = byteValue * width
        image = QtGui.QImage(img.data, width, height, byteValue, QtGui.QImage.Format_RGB888)
        pxmp = QtGui.QPixmap(image)
        self.label.setPixmap(pxmp)

    def filterWords(self):
        # sık geçen kelimeler filitreleniyor eksik elbette....
        flt = [u'https', u'nin', u'bir', u'daha', u'diye', u'için', u'gibi', u'işte', u'ile', u'değil', u'ben', u'sen',
               u'çok', u'ama', u'Sen',u'den',u'htt']
        derle = re.compile("\w*", re.UNICODE)
        wL = re.findall(derle, self.alText)
        temp = []
        for w in wL:
            if len(w) < 3:
                continue
            elif w in flt:
                continue
            else:
                #print w
                temp.append(w)
        self.alText = ' '.join(temp)

    def getTwits(self,keyWord):
        if len(keyWord) == 0:
            keyWord =u'"gündem"'
            self.lineEdit.setText(keyWord)
        self.alText = u''
        try :
            tList = self.twitter.search(keyWord, start=self.prevId, count=10, cached=False)

        except:
            message = "Twitter Aram Limiti Lütfen Biraz Bekleyin"
            QtGui.QMessageBox.information(self.dialog, "Information", "Python rocks!")

        for tweet in tList:
            self.listWidget.addItem(QtGui.QListWidgetItem(cleanTweet(tweet.text)))
            self.twIds.append(tweet.id)
            self.listWidget.setCurrentRow(self.listWidget.count()-1)
            tweet.text = self.filterRT(tweet.text)
            tweet.text = self.filterLink(tweet.text)
            self.alText = self.alText + plaintext(tweet.text) + u' '
            self.prevId = tweet.id

    def filterRT(self,tweet):
        # RT başlığı filtreleniyor
        buf = tweet[:2]
        if buf == u'RT':
            ix = tweet.find(':')
            tweet = tweet[ix:]
        return tweet

    def filterLink(self,tweet):
        regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
        match = re.search(regex, tweet)
        buf = tweet
        if match:
            ixs= tweet.find(match.group())
            ixe= len(match.group())
            try:
                buf = tweet[:ixs]
            except:
                print "not removed"
        return buf

    def goTweet(self):
        i = self.listWidget.currentRow()
        urlTw = 'https:/'+'/twitter.com/statuses/'+ str(self.twIds[i])
        webbrowser.open(urlTw)
예제 #57
0
"""Scrapes tweets from Twitter and svaes the results in a dictionary that is the pickled"""

from pattern.en import *
from pattern.web import Twitter
import pickle
MY_LICENSE = ('WgRmLC6IAhx27bRIG54ngxaRp', 'ldjhjaWF2G6jtPlg3mudc1IZV0V7PN7YZaSjuDqlw7QpvwF7ra', ('700461301575905284-PMu8wIBN2Qt1dW2T1nrytKjC0GYPgF3', 'OszrgU2gVUyBuNAmQc70CAARcpbqvu26DKwEKE0lAQ1ZG'))

# creates dictionary with weather conditions (ex. #snow) as keys and a list of 1000 tweet strings as the value

dictionary = {}
weather_conditions = ['#snow', '#rain', '#cold', '#storm', "#blizzard", '#sun', '#warm', '#drizzle', '#cloudy']
t = Twitter(license = MY_LICENSE)

for hashtag in weather_conditions:
	dictionary[hashtag] = []
	for tweet in t.search(hashtag, start = None, count = 1000):
		dictionary[hashtag].append(tweet.text)

# pickles the tweet dictionary

f = open('weather2.pickle', 'w')
pickle.dump(dictionary, f)
f.close()
예제 #58
0
##############################################################################################
##############################################################################################
##############################################################################################
from pattern.web import Twitter
#number of tickers to search
N=len(tickers)
#number of tweets to download
M=2000
#Dataframe
DF0=[]
#loop
t = Twitter()
for j in range(N):
    tick='$'+tickers[j]
    i = None
    for tweet in t.search(tick, start=i, count=M):
#        temp_text=re.sub('[,;"\'?():_`/\.]','',tweet.text)
#        temp_text=temp_text.strip()
        temp_text=tweet.text.strip()
        temp_text.replace('\n',' ')
        DF0.append({'id':tweet.id,'tickers':tick,'screen_name':tweet.author,'text':temp_text,'time':tweet.date})
#        print tweet.text
        i = tweet.id

DF2=DF0

for i in range(len(DF2)):
#    DF2[i]['text']=DF2[i]['text'].encode('utf-8')
#    DF2[i]['text'].encode('utf-8')
    DF2[i]['text'].replace(r"\\",'')
    DF2[i]['text'].replace('\n','')
from pattern.web import Crawler
from pattern.web import download
from pattern.web import plaintext
from textblob import TextBlob

from pattern.web import Twitter

t = Twitter()
i = None
for j in range(4):
    for tweet in t.search('#SuperBowl', start=i, count=40):
       # print tweet.id
       # print tweet.name
        print tweet.text
       # print tweet.coordinates

        blob = TextBlob(plaintext(tweet.text))
        for sentence in blob.sentences:
            print(sentence.sentiment.polarity)
            print blob.noun_phrases
            print