def on_data(self, dataTwitter): global dataCheck text = json.loads(dataTwitter)['text'].replace("@ArmHackathonBot ", "") isTalkToSteve = "#TalkToSteve" in text text = text.replace("#PublicOpinion", "") text = text.replace("#TalkToSteve", "") name = json.loads(dataTwitter)['user']['name'] nameTwitter = json.loads(dataTwitter)['user']['screen_name'] #get data if isTalkToSteve: AIBot.query(text) else: processTweet(text,name,nameTwitter) dataCheck = True return True
def json_2_csv(file_path, bound_boxes, translate=None, period=None, shape_local=None): #Conveto todo os arquivos de tweeets pra cvs delete_csv_file(file_path) #delete the csv file if it exist for dirName, subdirList, fileList in os.walk(file_path, topdown=True): print('Found directory: %s' % dirName) for fname in fileList: if fname.endswith(".txt"): print('\t%s' % fname) with open(dirName + fname, 'r') as file: print('Creating a csv file...') camp_list = [] for line in file: row = json.loads(line) #lang = row['lang'] if ("geo" in row): if (row['geo'] != None): #elimino tweets sem geolocalizacao camp_list.append(select_fields(row, False)) elif ("coordinates" in row): if (row['coordinates'] != None): #elimino tweets sem geolocalizacao camp_list.append(select_fields(row, False)) file_name = dirName + str(fname).replace(".txt", ".csv") df_Twitter = pd.DataFrame(camp_list, columns=SAVE_PAR) df_Twitter.to_csv(file_name, sep=",") print("Great, we converted the tweet data to csv file")
def cal_sentiment_analysis(): file_text = open('followers_tweets_data.txt', 'r') count = 0 try: for line in file_text: try: count += 1 tweet = json.loads((line)) print("==========") print(tweet["text"]) tweet_text = str(tweet["text"]).decode("utf-8") if len(tweet_text.strip()) != 0: clean_tweet_text = clean_data(tweet_text) # clean_tweet_text=(tweet_text) text_blob = TextBlob(clean_tweet_text) print(clean_tweet_text) print(text_blob.sentiment.polarity) print("==========") if count > 50: break except Exception: raise print("Internal Exception") except Exception as ex: print("General Exception") raise
def on_data(self, data): if (final_colect_time > tm.time()): try: with open(dir + date.today().isoformat() + '.txt', 'a') as f: #with open(date.isocalendar() + '.txt', 'a') as f: # Recebo em Json # Converto para Dict new_data = json.loads(data) if ((new_data['place'] is not None or new_data['coordinates'] is not None or new_data['geo'] )): #save tweets with or without geolocation # filtro (Pode ser outra função dentro da minha classe) basic_filter(new_data) #print(len(new_data)) # Salva como Json no Disco new_data = (json.dumps(new_data)) print(new_data) f.write(new_data) f.write('\n') f.close() except BaseException as e: error_handler(e, bound_boxes, track, dir, ckey, csecret, atoken, asecret, initial_colect_time, final_colect_time, time) print("Error on_data : %s" % str(e)) return True else: print("Final time:" + tm.asctime(tm.localtime(final_colect_time))) return False
def on_data(self, data): tweet = json.loads(data) filtered_tweet = {} try: # Clean tweet text (Remove links and special characters) filtered_tweet["text"] = ' '.join( re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) |(\w+:\/\/\S+)", " ", tweet["text"]).split()) # Sentiment Analysis analysis = TextBlob(filtered_tweet["text"]) if analysis.sentiment.polarity > 0: filtered_tweet["sentiment"] = "positive" elif analysis.sentiment.polarity == 0: filtered_tweet["sentiment"] = "neutral" else: filtered_tweet["sentiment"] = "negative" # Ignore retweets if (not tweet["retweeted"]) and ('RT @' not in tweet["text"]): db.tweets.insert(filtered_tweet) except KeyError: pass
def read_hashtags(): file_text = open('followers_tweets_data.txt', 'r', ) hashtags_dict = {} count = 0 try: for line in file_text: try: count += 1 tweet = json.loads((line)) tweet_text = str(tweet["text"].encode("utf-8")) if len(tweet_text.strip()) != 0: hashtag_re = re.findall(r'\B#\w*[a-zA-Z]+\w*', tweet_text) # print hashtag_re for item in hashtag_re: item = str(item).replace("#", "") if (item in hashtags_dict.keys()): hashtags_dict[item] += 1 else: hashtags_dict[item] = 1 except Exception: # raise print("Internal Exception") except Exception as ex: print("General Exception") # raise for keys, value in hashtags_dict.items(): if value > 100: print(keys, value) print("Total hashtags", len(hashtags_dict)) print("Total Tweets", count)
def read_tweet_text(): file_text = open('followers_tweets_data.txt', 'r', encoding="utf-8") count = 0 try: for line in file_text: try: count += 1 tweet = json.loads((line)) tweet_text = str(tweet["text"]) tweet_user_id = str(tweet["user"]["id"]) if len(tweet_text) != 0: if tweet_user_id not in user_tweets_dict.keys(): user_tweets_dict[tweet_user_id] = "" user_tweets_dict[tweet_user_id]=user_tweets_dict[tweet_user_id]+tweet_text except Exception: # raise print("Internal Exception") except Exception as ex: print("General Exception") # raise print("Reading tweets done")
def on_data(self, data): tweet = json.loads(data) for key in self.keywords: if re.search(key, tweet['text'], re.IGNORECASE): twit = Tweet(tweet['id'], key, tweet['text'].encode('ascii', 'ignore'), tweet['timestamp_ms'], 0) self.queue.put(twit) return True
def on_data(self, data): try: # get the json twits entry data = json.loads(data) # check if the entry exists if data.get('text', 0): records_list = [] twit_text = data.get('text') # tokenize the twit words = str(twit_text).split() # eliminate stop words for word in words: # check if the twit words are valid i.e. skip the term and the host, and check for valid size if len(word) > 1 and word.lower() != self.term \ and word.lower() not in self.stopWords and not word.startswith('http'): # add the word to the list records_list.append( self.create_mongo_message(self.city, self.term, word)) # add records to the MongoDB self.update_mongodb(records_list) except Exception as e: print(e) return False return True
def convert_for_dictionary(ArquivoJSON): data = ArquivoJSON.readlines() new_data = [] #Lista dentro de uma lista for row in data: new_data.append(json.loads(row)) return new_data
def on_data(self, data): try: self.cnt += 1 data = json.loads(data) if data.get('text'): records_list = [] twit_text = data.get('text') # tokenize the twit words = str(twit_text).split() # words = word_tokenize(data.get('text')) # eliminate stop words for word in words: if word.find('…') != -1: continue if len(word) > 1 and word.lower() != self.term \ and word.lower() not in self.stopWords and not word.startswith('http'): # add the word to the list print(word.translate(self.translator)) records_list.append( self.create_mongo_message(self.term, word)) # add records to the MongoDB self.update_mongodb(records_list) except Exception as e: print(e) return False return True
def on_data(self, data): tweet = json.loads(data) try: db.tweets.insert(tweet) except KeyError: pass
def on_data(self, tweet): tweet = json.loads(tweet) if tweet['user']['id'] == userid: print("Official Tweet: " + stringify(tweet)) gottweet(tweet) else: print("Other: " + stringify(tweet)) return True
def on_data(self, raw_data): data = json.loads(raw_data) if 'text' in data: print(data['text']) for tag in data['text'].split(): if tag.startswith("#"): self.redis.zincrby(name="tweets", value=tag.replace("#", "", 1)) print("Tag: %s" % tag) return True
def on_data(self, data): tweet = json.loads(data) for key in self.keywords: if re.search(key, tweet['text'], re.IGNORECASE): twit = Tweet(tweet['id'], key, tweet['text'].encode('ascii', 'ignore'), tweet['timestamp_ms'], 0) self.queue.put(twit) return True
def on_data(self, data): username = (json.loads(data)['user']['screen_name']) print(data) while len(list_users) < 1000: list_users.append(username) get_user(username, location=location) return True else: return False
def populatingTweetsTable(game): with open(f'TweetsPerGame/{game}.json', 'r') as f: # open the file for line in f: tweet = json.loads(line) # Transform in a dic # add in tweets table addTweet(game, tweet['created_at'], tweet['id_str'], tweet['text'], tweet['user']['id_str'], tweet['user']['name'], tweet['user']['screen_name'], tweet['user']['location'], tweet['user']['description'], tweet['user']['lang'], tweet['lang'])
def on_data(self, data): username = (json.loads(data)['user']['screen_name']) print(username) i = 0 while len(list_users) < 20: list_users.insert(i, username) i += 1 return True else: return False
def on_data(self, data): decoded = json.loads(data) txt = decoded['text'] # screenName = decoded["screen_name"] # name= decoded["name"] # hashTag = decoded["hashtag"] # createdAt = decoded["created_at"] # print(hashTag) if "the" in txt: self.sendTweet("test") print("\n" + txt) return True
def on_data(self, data): if self._count < self._max_tweets and self._alive: tweet = json.loads(data) if tweet['lang'] == 'en': self.tweet_store.add_tweet( {'guid': self._count, 'id': tweet['id'], 'text': tweet['text'], 'query': self._query_terms}) self._count += 1 return True else: print 'Reached tweet limit ... shutdown' return False
def on_data(self, data): try: tweet = json.loads(data) # transform the tweet in a dic creatingTable(databaseTable) addTweet(databaseTable, tweet['created_at'], tweet['id_str'], tweet['text'], tweet['user']['id_str'], tweet['user']['name'], tweet['user']['screen_name'], tweet['user']['location'], tweet['user']['description'], tweet['user']['lang'], tweet['lang']) return True except BaseException as e: print("Error on_data: %s" % str(e)) return True
def save_tweets_metadata(): meta_data = open('MetaData/tweets_sentiment.txt', 'a') file_text = open('followers_tweets_data.txt', 'r', encoding="utf-8") hashtags_dict = {} count = 0 try: for line in file_text: try: count += 1 tweet = json.loads((line)) tweet_text = str(tweet["text"]) tweet_user_id = str(tweet["user"]["id"]) if len(tweet_text) != 0: text_blob = TextBlob(tweet_text) meta_data.write(tweet_user_id + "\t") meta_data.write("%s" % str(text_blob.sentiment.polarity)) # print tweet_text hashtag_re = re.findall(r'\B#\w*[a-zA-Z]+\w*', tweet_text) for item in hashtag_re: item = str(item).replace("#", "") meta_data.write("\t" + (item)) if (item in hashtags_dict.keys()): hashtags_dict[item] += 1 else: hashtags_dict[item] = 1 meta_data.write("\n") # print "==========" # if count > 500: # break except Exception: # raise print("Internal Exception") except Exception as ex: print("General Exception") # raise for keys, value in hashtags_dict.items(): if value > 100: print(keys, value) print("Total hashtags", len(hashtags_dict)) print("Total Tweets", count)
def on_data(self, data): obj = json.loads(data) # text = obj.get("text") or "" if "limit" in obj: logging.warn(obj) return True if "user" not in obj: logging.warn(obj) return True tweet_id = str(obj.get("id_str")) self.publish_to_redis(data, tweet_id) return True
def on_data(self, data): obj = json.loads(data) #text = obj.get("text") or "" if "limit" in obj: logging.warn(obj) return True if "user" not in obj: logging.warn(obj) return True tweet_id = str(obj.get("id_str")) self.publish_to_redis(data, tweet_id) return True
def on_data(self, data): global track_words global words tweet = json.loads(data) if tweet['text']: text = tweet['text'] print(text) else: return True tokens = tokenize_tweet_text(text) for token in tokens: if ((token not in track_words)): print('inserting word ', token) insert_word(token) return True
def on_data(self, data): all_data = json.loads(data) tweet = all_data["text"] sentiment_value, confidence = sentiment.sentiment(tweet) print(tweet, "\n", sentiment_value, confidence) if confidence * 100 > min_confidence: output = open(os.getcwd() + "/twitter-output.txt", "a") output.write(sentiment_value) output.write("\n") output.close() return True
def on_data(self, raw_data): json_data = json.loads(raw_data) if json_data["place"]["name"] == city_name: storeJSON(json_data, self.db) timeline_pages = get_timeline_pages(self.auth, json_data["user"]["id"]) if timeline_pages != None: storeUserTimeline(timeline_pages, self.db) ## friend list and their timeline friend_ids = get_friend_ids(self.auth, json_data["user"]["id"]) if friend_ids != None: for friend_id in friend_ids: friends_timeline_pages = get_timeline_pages(self.auth, friend_id) if friends_timeline_pages != None: storeFriendsTimeline(friends_timeline_pages, self.db) return True
def on_data(self, raw_data): data = json.loads(raw_data) if "extended_tweet" in data: text = data["extended_tweet"]["full_text"] #print(text) # put message into Kafka producer.send(topic_name, text.encode("utf-8")) else: if "text" in data: text = data["text"].lower() #print(data["text"]) # put message into Kafka producer.send(topic_name, data["text"].encode("utf-8"))
def on_data(self, data): #This is the main part of the script since it makes possible to connect to mongoDB and stores the tweet try: client = MongoClient('localhost', 27017) # clean_tweets is the new db that we created to store the tweets db = client.clean_tweets # Decode the JSON from Twitter datajson = json.loads(data) #Get just the tweet tweet = datajson['text'] # apply the cleaning function to the tweet clean_tweet = cleanTweet(tweet) # Python Object that would be save in the db. pyObject = {'text': clean_tweet, 'fullResponse': datajson} #It will insert into 'tweets' collection the data that are streamed db.tweets.insert(pyObject) except Exception as e: print(e)
def read_tweets_information_file(): user_dict = {} file_text = open('followers_tweets_data.txt', 'r') count = 0 try: for line in file_text: count += 1 tweet = json.loads((line)) if (tweet["user"]["id"] in user_dict.keys()): user_dict[tweet["user"]["id"]] += 1 else: user_dict[tweet["user"]["id"]] = 1 except Exception as ex: print("General Exception") for keys, value in user_dict.items(): print(keys, value) print("Total Users", len(user_dict)) print("Total Tweets", count)
def on_data(self, data): data = data.decode('ascii', errors="replace") tweets = json.loads('[' + data + ']') for doc in [x for x in tweets if "text" in x.keys()]: tweet = {} if doc["coordinates"] is not None: tweet['coordinates'] = doc['coordinates']['coordinates'] if "hashtags" in doc["entities"].keys(): hashtags = [x["text"] for x in doc["entities"]["hashtags"]] tweet["hashtags"] = hashtags tweet['timestamp_ms'] = doc['timestamp_ms'] tweet["retweet_count"] = doc["retweet_count"] tweet["favorite_count"] = doc["favorite_count"] tweet['text'] = doc['text'] tweet["id_str"] = doc['id_str'] tweet['user'] = {'id': doc['user']['id'], 'name': doc['user']['name']} print(tweet) self.store_tweet(tweet) time.sleep(1) return True
def on_data(self, data): global location global track_words global words tweet = json.loads(data) #print(tweet) if "user" in tweet and "location" in tweet["user"] and tweet["user"][ "location"]: loc = tweet["user"]["location"] else: return True if tweet['text']: text = tweet['text'] else: return True if (location.lower() in loc.lower()): tokens = tokenize_tweet_text(text) print('location', loc, 'tokens', tokens) for token in tokens: if ((token not in track_words)): # and (len(token) > 2)): insert_word(token, location) return True
def sentimentValue(tweets): r = sendRecieveMeaningCloud(tweets) data = json.loads(r.content) sentiment = convertSentiment(data) return sentiment
def on_data(self, data): twit = json.loads(data) if "user" in twit and "location" in twit["user"] and twit["user"][ "location"]: self.add_words_to_mongo(twit) return True
def on_error(self, status): print("Error on_data: %s" % str(status)) tweets_collection.insert_one(json.loads(status)) return True
if len(labeled) == 0: return "Hey!", 0 probable_label = max( labeled, key=itemgetter(1))[0][0] # get the most likely probability #tweet.update({"classified": str(probable_label).split(", ")[0]}) return str(probable_label).split(", ")[0], 1 drawing = start_new_thread(draw_bar_graph, ("classified", classified)) ssc = StreamingContext(sc, 10) dstream = ssc.socketTextStream("localhost", my_streamer.PORT) # Turn it into JSON tweets = dstream\ .map(lambda tweet_data: json.loads(tweet_data.decode('utf-8')))\ .map(tweet.santitize_tweet)\ .filter(tweet.only_images)\ .map(tweet.get_media_url)\ .map(classify_media)\ .reduceByKey(add)\ .foreachRDD(lambda time, rdd: classified.extend(rdd.collect())) # [Class, Num] #tweets.pprint() ssc.start() sleep(30) # Consume 30 seconds of data ssc.stop(stopSparkContext=True, stopGraceFully=True) # Give it time to kill Spark..., then close the socket.
def on_data(self, raw_data): #print (raw_data) global n try: json_load = json.loads(raw_data) tweet_text = json_load['text'] # coded = texts.encode('utf-8') # s = str(tweet_text) # print(s) #tweet_text = raw_data.lower().split('"text":"')[1].split('","source":"')[0].replace(",", "") screen_name = raw_data.lower().split('"screen_name":"')[1].split( '","location"')[0].replace(",", "") tweet_cid = raw_data.split('"id":')[1].split( '"id_str":')[0].replace(",", "") #print(tweet_text) n = 0 del tweet_words[:] for key in orderedkeys: tweet_words.append(0) #print(key) #print(n) TweetTokenization3.tweetToken(tweet_text, key) #print(tweet_words) # m += tweet_words[n] #print(m) n += 1 #print(tweet_words) #print(orderedvalues) #print(len(tweet_words)) #print(orderedvalues) #print(len(orderedvalues)) #myvalues = np.asarray(values) #mytweet_words = np.asarray(tweet_words) #print(myvalues) #print(mytweet_words) #print(sum(tweet_words)) if sum(tweet_words) == 0: return True cos_sim = cosine_similarity(tweet_words, orderedvalues) #print (cos_sim) #cos_sim = dot(mytweet_words, myvalues.T) / linalg.norm(mytweet_words) / linalg.norm(myvalues) accs = ['twitter', 'twittersupport' ] # banned account screen name goes in here words = ['hate', 'derp'] # banned words goes in here if not any(acc in screen_name.lower() for acc in accs): if not any(word in tweet_text.lower() for word in words): # call what u want to do here # tweet(tweet_cid) # unfav(tweet_cid) # retweet(tweet_cid) if (cos_sim > threshold2): print(tweet_text) print(orderedkeys) print(orderedvalues) print(tweet_words) print(cos_sim) fav(tweet_cid) retweet(tweet_cid) print("favorites and retweet") # # syntax need to be fixed here return True if (cos_sim > threshold1): print(tweet_text) print(orderedkeys) print(tweet_words) print(orderedvalues) print(cos_sim) retweet(tweet_cid) print("retweet") # # syntax need to be fixed here return True except Exception as e: print("tugba") print(str( e)) # prints the error msg, if u dont want it comment it out pass
def on_messages_received(self, messages): messages = json.loads(messages) self._process(messages) return True
def on_messages_received(self, messages): messages = json.loads(messages) self._process(messages) return True