Ejemplo n.º 1
0
 def on_data(self, dataTwitter):
     global dataCheck
     text = json.loads(dataTwitter)['text'].replace("@ArmHackathonBot ", "")
     isTalkToSteve = "#TalkToSteve" in text
     text = text.replace("#PublicOpinion", "")
     text = text.replace("#TalkToSteve", "")
     name = json.loads(dataTwitter)['user']['name']
     nameTwitter = json.loads(dataTwitter)['user']['screen_name']
     #get data
     if isTalkToSteve:
         AIBot.query(text)
     else:
         processTweet(text,name,nameTwitter)
     dataCheck = True
     return True
Ejemplo n.º 2
0
def json_2_csv(file_path,
               bound_boxes,
               translate=None,
               period=None,
               shape_local=None):  #Conveto todo os arquivos de tweeets pra cvs
    delete_csv_file(file_path)  #delete the csv file if it exist
    for dirName, subdirList, fileList in os.walk(file_path, topdown=True):
        print('Found directory: %s' % dirName)
        for fname in fileList:
            if fname.endswith(".txt"):
                print('\t%s' % fname)
                with open(dirName + fname, 'r') as file:
                    print('Creating a csv file...')

                    camp_list = []

                    for line in file:
                        row = json.loads(line)
                        #lang = row['lang']
                        if ("geo" in row):
                            if (row['geo'] !=
                                    None):  #elimino tweets sem geolocalizacao
                                camp_list.append(select_fields(row, False))
                        elif ("coordinates" in row):
                            if (row['coordinates'] !=
                                    None):  #elimino tweets sem geolocalizacao
                                camp_list.append(select_fields(row, False))

                    file_name = dirName + str(fname).replace(".txt", ".csv")
                    df_Twitter = pd.DataFrame(camp_list, columns=SAVE_PAR)

                    df_Twitter.to_csv(file_name, sep=",")

                    print("Great, we converted the tweet data to csv file")
Ejemplo n.º 3
0
def cal_sentiment_analysis():
    file_text = open('followers_tweets_data.txt', 'r')
    count = 0
    try:
        for line in file_text:
            try:
                count += 1
                tweet = json.loads((line))
                print("==========")
                print(tweet["text"])
                tweet_text = str(tweet["text"]).decode("utf-8")
                if len(tweet_text.strip()) != 0:
                    clean_tweet_text = clean_data(tweet_text)
                    # clean_tweet_text=(tweet_text)
                    text_blob = TextBlob(clean_tweet_text)
                    print(clean_tweet_text)
                    print(text_blob.sentiment.polarity)

                    print("==========")
                    if count > 50:
                        break
            except Exception:
                raise
                print("Internal Exception")

    except Exception as ex:
        print("General Exception")
        raise
Ejemplo n.º 4
0
 def on_data(self, data):
     if (final_colect_time > tm.time()):
         try:
             with open(dir + date.today().isoformat() + '.txt',
                       'a') as f:
                 #with open(date.isocalendar() + '.txt', 'a') as f:
                 # Recebo em Json
                 # Converto para Dict
                 new_data = json.loads(data)
                 if ((new_data['place'] is not None
                      or new_data['coordinates'] is not None
                      or new_data['geo']
                      )):  #save tweets with or without geolocation
                     # filtro (Pode ser outra função dentro da minha classe)
                     basic_filter(new_data)
                     #print(len(new_data))
                     # Salva como Json no Disco
                     new_data = (json.dumps(new_data))
                     print(new_data)
                     f.write(new_data)
                     f.write('\n')
                     f.close()
         except BaseException as e:
             error_handler(e, bound_boxes, track, dir, ckey, csecret,
                           atoken, asecret, initial_colect_time,
                           final_colect_time, time)
             print("Error on_data : %s" % str(e))
         return True
     else:
         print("Final time:" +
               tm.asctime(tm.localtime(final_colect_time)))
         return False
Ejemplo n.º 5
0
    def on_data(self, data):

        tweet = json.loads(data)

        filtered_tweet = {}

        try:
            # Clean tweet text (Remove links and special characters)
            filtered_tweet["text"] = ' '.join(
                re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) |(\w+:\/\/\S+)",
                       " ", tweet["text"]).split())

            # Sentiment Analysis
            analysis = TextBlob(filtered_tweet["text"])
            if analysis.sentiment.polarity > 0:
                filtered_tweet["sentiment"] = "positive"
            elif analysis.sentiment.polarity == 0:
                filtered_tweet["sentiment"] = "neutral"
            else:
                filtered_tweet["sentiment"] = "negative"

            # Ignore retweets
            if (not tweet["retweeted"]) and ('RT @' not in tweet["text"]):
                db.tweets.insert(filtered_tweet)
        except KeyError:
            pass
Ejemplo n.º 6
0
def read_hashtags():
    file_text = open('followers_tweets_data.txt', 'r', )
    hashtags_dict = {}
    count = 0
    try:
        for line in file_text:
            try:
                count += 1
                tweet = json.loads((line))
               
                tweet_text = str(tweet["text"].encode("utf-8"))
                if len(tweet_text.strip()) != 0:
                    hashtag_re = re.findall(r'\B#\w*[a-zA-Z]+\w*', tweet_text)
                    # print hashtag_re
                    for item in hashtag_re:
                        item = str(item).replace("#", "")
                        if (item in hashtags_dict.keys()):
                            hashtags_dict[item] += 1
                        else:
                            hashtags_dict[item] = 1

                
            except Exception:
                # raise
                print("Internal Exception")

    except Exception as ex:
        print("General Exception")
        # raise
    for keys, value in hashtags_dict.items():
        if value > 100:
            print(keys, value)

    print("Total hashtags", len(hashtags_dict))
    print("Total Tweets", count)
Ejemplo n.º 7
0
def read_tweet_text():

    file_text = open('followers_tweets_data.txt', 'r', encoding="utf-8")
    count = 0
    try:
        for line in file_text:
            try:
                count += 1
                tweet = json.loads((line))

                tweet_text = str(tweet["text"])
                tweet_user_id = str(tweet["user"]["id"])
                if len(tweet_text) != 0:
                    if tweet_user_id not in user_tweets_dict.keys():
                        user_tweets_dict[tweet_user_id] = ""
                        user_tweets_dict[tweet_user_id]=user_tweets_dict[tweet_user_id]+tweet_text

               
            except Exception:
                # raise
                print("Internal Exception")

    except Exception as ex:
        print("General Exception")
        # raise

    print("Reading tweets done")
Ejemplo n.º 8
0
 def on_data(self, data):
     tweet = json.loads(data)
     for key in self.keywords:
         if re.search(key, tweet['text'], re.IGNORECASE):
             twit = Tweet(tweet['id'], key, tweet['text'].encode('ascii', 'ignore'), tweet['timestamp_ms'], 0)
             self.queue.put(twit)
     return True
Ejemplo n.º 9
0
    def on_data(self, data):
        try:
            # get the json twits entry
            data = json.loads(data)
            # check if the entry exists
            if data.get('text', 0):
                records_list = []
                twit_text = data.get('text')
                # tokenize the twit
                words = str(twit_text).split()

                # eliminate stop words
                for word in words:
                    # check if the twit words are valid i.e. skip the term and the host, and check for valid size
                    if len(word) > 1 and word.lower() != self.term \
                            and word.lower() not in self.stopWords and not word.startswith('http'):
                        # add the word to the list
                        records_list.append(
                            self.create_mongo_message(self.city, self.term,
                                                      word))

                # add records to the MongoDB
                self.update_mongodb(records_list)

        except Exception as e:
            print(e)
            return False
        return True
Ejemplo n.º 10
0
def convert_for_dictionary(ArquivoJSON):
    data = ArquivoJSON.readlines()
    new_data = []
    #Lista dentro de uma lista
    for row in data:
        new_data.append(json.loads(row))
    return new_data
Ejemplo n.º 11
0
    def on_data(self, data):
        try:
            self.cnt += 1
            data = json.loads(data)
            if data.get('text'):
                records_list = []
                twit_text = data.get('text')
                # tokenize the twit
                words = str(twit_text).split()
                # words = word_tokenize(data.get('text'))

                # eliminate stop words
                for word in words:
                    if word.find('…') != -1:
                        continue

                    if len(word) > 1 and word.lower() != self.term \
                            and word.lower() not in self.stopWords and not word.startswith('http'):
                        # add the word to the list
                        print(word.translate(self.translator))
                        records_list.append(
                            self.create_mongo_message(self.term, word))

                # add records to the MongoDB
                self.update_mongodb(records_list)

        except Exception as e:
            print(e)
            return False
        return True
Ejemplo n.º 12
0
    def on_data(self, data):

        tweet = json.loads(data)

        try:
            db.tweets.insert(tweet)
        except KeyError:
            pass
Ejemplo n.º 13
0
 def on_data(self, tweet):
     tweet = json.loads(tweet)
     if tweet['user']['id'] == userid:
         print("Official Tweet: " + stringify(tweet))
         gottweet(tweet)
     else:
         print("Other: " + stringify(tweet))
     return True
 def on_data(self, raw_data):
     data = json.loads(raw_data)
     if 'text' in data:
         print(data['text'])
         for tag in data['text'].split():
             if tag.startswith("#"):
                 self.redis.zincrby(name="tweets", value=tag.replace("#", "", 1))
                 print("Tag: %s" % tag)
     return True
Ejemplo n.º 15
0
 def on_data(self, data):
     tweet = json.loads(data)
     for key in self.keywords:
         if re.search(key, tweet['text'], re.IGNORECASE):
             twit = Tweet(tweet['id'], key,
                          tweet['text'].encode('ascii', 'ignore'),
                          tweet['timestamp_ms'], 0)
             self.queue.put(twit)
     return True
Ejemplo n.º 16
0
 def on_data(self, data):
     username = (json.loads(data)['user']['screen_name'])
     print(data)
     while len(list_users) < 1000:
         list_users.append(username)
         get_user(username, location=location)
         return True
     else:
         return False
Ejemplo n.º 17
0
def populatingTweetsTable(game):
    with open(f'TweetsPerGame/{game}.json', 'r') as f:  # open the file
        for line in f:
            tweet = json.loads(line)  # Transform in a dic
            # add in tweets table
            addTweet(game, tweet['created_at'], tweet['id_str'], tweet['text'],
                     tweet['user']['id_str'], tweet['user']['name'],
                     tweet['user']['screen_name'], tweet['user']['location'],
                     tweet['user']['description'], tweet['user']['lang'],
                     tweet['lang'])
Ejemplo n.º 18
0
 def on_data(self, data):
     username = (json.loads(data)['user']['screen_name'])
     print(username)
     i = 0
     while len(list_users) < 20:
         list_users.insert(i, username)
         i += 1
         return True
     else:
         return False
Ejemplo n.º 19
0
 def on_data(self, data):
     decoded = json.loads(data)
     txt = decoded['text']
     # screenName = decoded["screen_name"]
     # name= decoded["name"]
     # hashTag = decoded["hashtag"]
     # createdAt = decoded["created_at"]
     # print(hashTag)
     if "the" in txt:
         self.sendTweet("test")
     print("\n" + txt)
     return True
Ejemplo n.º 20
0
    def on_data(self, data):
        if self._count < self._max_tweets and self._alive:
            tweet = json.loads(data)
            if tweet['lang'] == 'en':
                self.tweet_store.add_tweet(
                    {'guid': self._count, 'id': tweet['id'], 'text': tweet['text'], 'query': self._query_terms})

            self._count += 1
            return True
        else:
            print 'Reached tweet limit ... shutdown'
            return False
Ejemplo n.º 21
0
 def on_data(self, data):
     try:
         tweet = json.loads(data)  # transform the tweet in a dic
         creatingTable(databaseTable)
         addTweet(databaseTable, tweet['created_at'], tweet['id_str'],
                  tweet['text'], tweet['user']['id_str'],
                  tweet['user']['name'], tweet['user']['screen_name'],
                  tweet['user']['location'], tweet['user']['description'],
                  tweet['user']['lang'], tweet['lang'])
         return True
     except BaseException as e:
         print("Error on_data: %s" % str(e))
     return True
Ejemplo n.º 22
0
def save_tweets_metadata():
    meta_data = open('MetaData/tweets_sentiment.txt', 'a')

    file_text = open('followers_tweets_data.txt', 'r', encoding="utf-8")
    hashtags_dict = {}
    count = 0
    try:
        for line in file_text:
            try:
                count += 1
                tweet = json.loads((line))

                tweet_text = str(tweet["text"])
                tweet_user_id = str(tweet["user"]["id"])
                if len(tweet_text) != 0:

                    text_blob = TextBlob(tweet_text)

                    meta_data.write(tweet_user_id + "\t")
                    meta_data.write("%s" % str(text_blob.sentiment.polarity))
                    # print tweet_text

                    hashtag_re = re.findall(r'\B#\w*[a-zA-Z]+\w*', tweet_text)

                    for item in hashtag_re:
                        item = str(item).replace("#", "")
                        meta_data.write("\t" + (item))
                        if (item in hashtags_dict.keys()):
                            hashtags_dict[item] += 1
                        else:
                            hashtags_dict[item] = 1
                    meta_data.write("\n")

                # print "=========="
                # if count > 500:
                #     break
            except Exception:
                # raise
                print("Internal Exception")

    except Exception as ex:
        print("General Exception")
        # raise

    for keys, value in hashtags_dict.items():
        if value > 100:
            print(keys, value)

    print("Total hashtags", len(hashtags_dict))
    print("Total Tweets", count)
Ejemplo n.º 23
0
    def on_data(self, data):
        obj = json.loads(data)
        # text = obj.get("text") or ""

        if "limit" in obj:
            logging.warn(obj)
            return True

        if "user" not in obj:
            logging.warn(obj)
            return True

        tweet_id = str(obj.get("id_str"))
        self.publish_to_redis(data, tweet_id)
        return True
Ejemplo n.º 24
0
    def on_data(self, data):
        obj = json.loads(data)
        #text = obj.get("text") or ""

        if "limit" in obj:
            logging.warn(obj)
            return True

        if "user" not in obj:
            logging.warn(obj)
            return True

        tweet_id = str(obj.get("id_str"))
        self.publish_to_redis(data, tweet_id)
        return True
Ejemplo n.º 25
0
    def on_data(self, data):
        global track_words
        global words
        tweet = json.loads(data)
        if tweet['text']:
            text = tweet['text']
            print(text)
        else:
            return True

        tokens = tokenize_tweet_text(text)
        for token in tokens:
            if ((token not in track_words)):
                print('inserting word ', token)
                insert_word(token)
        return True
Ejemplo n.º 26
0
    def on_data(self, data):

        all_data = json.loads(data)

        tweet = all_data["text"]
        sentiment_value, confidence = sentiment.sentiment(tweet)

        print(tweet, "\n", sentiment_value, confidence)

        if confidence * 100 > min_confidence:

            output = open(os.getcwd() + "/twitter-output.txt", "a")
            output.write(sentiment_value)
            output.write("\n")
            output.close()

        return True
Ejemplo n.º 27
0
 def on_data(self, raw_data):
     json_data = json.loads(raw_data)
     if json_data["place"]["name"] == city_name:
         storeJSON(json_data, self.db)
         timeline_pages = get_timeline_pages(self.auth, json_data["user"]["id"])
         if timeline_pages != None:
             storeUserTimeline(timeline_pages, self.db)
         
     ## friend list and their timeline
     friend_ids = get_friend_ids(self.auth, json_data["user"]["id"])
     if friend_ids != None:
         for friend_id in friend_ids:
             friends_timeline_pages = get_timeline_pages(self.auth, friend_id)
             if friends_timeline_pages != None:
                 storeFriendsTimeline(friends_timeline_pages, self.db)
                 
     return True
Ejemplo n.º 28
0
    def on_data(self, raw_data):

        data = json.loads(raw_data)

        if "extended_tweet" in data:
            text = data["extended_tweet"]["full_text"]

            #print(text)

            # put message into Kafka
            producer.send(topic_name, text.encode("utf-8"))
        else:
            if "text" in data:
                text = data["text"].lower()

                #print(data["text"])

                # put message into Kafka
                producer.send(topic_name, data["text"].encode("utf-8"))
Ejemplo n.º 29
0
    def on_data(self, data):
        #This is the main part of the script since it makes possible to connect to mongoDB and stores the tweet
        try:
            client = MongoClient('localhost', 27017)
            # clean_tweets is the new db that we created to store the tweets
            db = client.clean_tweets
            # Decode the JSON from Twitter
            datajson = json.loads(data)
            #Get just the tweet
            tweet = datajson['text']
            # apply the cleaning function to the tweet
            clean_tweet = cleanTweet(tweet)
            # Python Object that would be save in the db.
            pyObject = {'text': clean_tweet, 'fullResponse': datajson}

            #It will insert into 'tweets' collection the data that are streamed
            db.tweets.insert(pyObject)
        except Exception as e:
            print(e)
Ejemplo n.º 30
0
def read_tweets_information_file():
    user_dict = {}
    file_text = open('followers_tweets_data.txt', 'r')
    count = 0
    try:
        for line in file_text:
            count += 1
            tweet = json.loads((line))
        
            if (tweet["user"]["id"] in user_dict.keys()):
                user_dict[tweet["user"]["id"]] += 1
            else:
                user_dict[tweet["user"]["id"]] = 1
    except Exception as ex:
        print("General Exception")

    for keys, value in user_dict.items():
        print(keys, value)

    print("Total Users", len(user_dict))
    print("Total Tweets", count)
    def on_data(self, data):
        data = data.decode('ascii', errors="replace")
        tweets = json.loads('[' + data + ']')
        for doc in [x for x in tweets if "text" in x.keys()]:
            tweet = {}

            if doc["coordinates"] is not None:
                tweet['coordinates'] = doc['coordinates']['coordinates']
            if "hashtags" in doc["entities"].keys():
                hashtags = [x["text"] for x in doc["entities"]["hashtags"]]
                tweet["hashtags"] = hashtags

            tweet['timestamp_ms'] = doc['timestamp_ms']
            tweet["retweet_count"] = doc["retweet_count"]
            tweet["favorite_count"] = doc["favorite_count"]
            tweet['text'] = doc['text']
            tweet["id_str"] = doc['id_str']
            tweet['user'] = {'id': doc['user']['id'],
                             'name': doc['user']['name']}
            print(tweet)
            self.store_tweet(tweet)
            time.sleep(1)
        return True
    def on_data(self, data):
        global location
        global track_words
        global words
        tweet = json.loads(data)
        #print(tweet)
        if "user" in tweet and "location" in tweet["user"] and tweet["user"][
                "location"]:
            loc = tweet["user"]["location"]
        else:
            return True
        if tweet['text']:
            text = tweet['text']
        else:
            return True

        if (location.lower() in loc.lower()):
            tokens = tokenize_tweet_text(text)
            print('location', loc, 'tokens', tokens)
            for token in tokens:
                if ((token not in track_words)):  # and (len(token) > 2)):
                    insert_word(token, location)
        return True
Ejemplo n.º 33
0
def sentimentValue(tweets):
    r = sendRecieveMeaningCloud(tweets)
    data = json.loads(r.content)
    sentiment = convertSentiment(data)
    return sentiment
 def on_data(self, data):
     twit = json.loads(data)
     if "user" in twit and "location" in twit["user"] and twit["user"][
             "location"]:
         self.add_words_to_mongo(twit)
     return True
Ejemplo n.º 35
0
 def on_error(self, status):
     print("Error on_data: %s" % str(status))
     tweets_collection.insert_one(json.loads(status))
     return True
Ejemplo n.º 36
0
    if len(labeled) == 0:
        return "Hey!", 0
    probable_label = max(
        labeled, key=itemgetter(1))[0][0]  # get the most likely probability

    #tweet.update({"classified": str(probable_label).split(", ")[0]})
    return str(probable_label).split(", ")[0], 1


drawing = start_new_thread(draw_bar_graph, ("classified", classified))
ssc = StreamingContext(sc, 10)
dstream = ssc.socketTextStream("localhost", my_streamer.PORT)

# Turn it into JSON
tweets = dstream\
    .map(lambda tweet_data: json.loads(tweet_data.decode('utf-8')))\
    .map(tweet.santitize_tweet)\
    .filter(tweet.only_images)\
    .map(tweet.get_media_url)\
    .map(classify_media)\
    .reduceByKey(add)\
    .foreachRDD(lambda time, rdd: classified.extend(rdd.collect()))
# [Class, Num]

#tweets.pprint()

ssc.start()
sleep(30)  # Consume 30 seconds of data
ssc.stop(stopSparkContext=True, stopGraceFully=True)

# Give it time to kill Spark..., then close the socket.
Ejemplo n.º 37
0
    def on_data(self, raw_data):
        #print (raw_data)

        global n
        try:
            json_load = json.loads(raw_data)
            tweet_text = json_load['text']
            #  coded = texts.encode('utf-8')
            # s = str(tweet_text)
            # print(s)
            #tweet_text = raw_data.lower().split('"text":"')[1].split('","source":"')[0].replace(",", "")
            screen_name = raw_data.lower().split('"screen_name":"')[1].split(
                '","location"')[0].replace(",", "")
            tweet_cid = raw_data.split('"id":')[1].split(
                '"id_str":')[0].replace(",", "")
            #print(tweet_text)
            n = 0
            del tweet_words[:]

            for key in orderedkeys:
                tweet_words.append(0)
                #print(key)
                #print(n)
                TweetTokenization3.tweetToken(tweet_text, key)
                #print(tweet_words)
                # m += tweet_words[n]
                #print(m)
                n += 1
            #print(tweet_words)
            #print(orderedvalues)
            #print(len(tweet_words))
            #print(orderedvalues)
            #print(len(orderedvalues))
            #myvalues = np.asarray(values)
            #mytweet_words = np.asarray(tweet_words)
            #print(myvalues)
            #print(mytweet_words)
            #print(sum(tweet_words))
            if sum(tweet_words) == 0:
                return True
            cos_sim = cosine_similarity(tweet_words, orderedvalues)
            #print (cos_sim)
            #cos_sim = dot(mytweet_words, myvalues.T) / linalg.norm(mytweet_words) / linalg.norm(myvalues)

            accs = ['twitter', 'twittersupport'
                    ]  # banned account screen name goes in here
            words = ['hate', 'derp']  # banned words goes in here

            if not any(acc in screen_name.lower() for acc in accs):
                if not any(word in tweet_text.lower() for word in words):
                    # call what u want to do here
                    # tweet(tweet_cid)
                    # unfav(tweet_cid)
                    # retweet(tweet_cid)
                    if (cos_sim > threshold2):
                        print(tweet_text)
                        print(orderedkeys)
                        print(orderedvalues)
                        print(tweet_words)
                        print(cos_sim)
                        fav(tweet_cid)
                        retweet(tweet_cid)
                        print("favorites and retweet")
                        #     # syntax need to be fixed here
                        return True

                    if (cos_sim > threshold1):
                        print(tweet_text)
                        print(orderedkeys)
                        print(tweet_words)
                        print(orderedvalues)
                        print(cos_sim)

                        retweet(tweet_cid)
                        print("retweet")
                        #     # syntax need to be fixed here
                        return True

        except Exception as e:
            print("tugba")
            print(str(
                e))  # prints the error msg, if u dont want it comment it out
            pass
Ejemplo n.º 38
0
    def on_messages_received(self, messages):
        messages = json.loads(messages)
        self._process(messages)

        return True
Ejemplo n.º 39
0
    def on_messages_received(self, messages):
        messages = json.loads(messages)
        self._process(messages)

        return True