예제 #1
0
	def selectClassesLimit(self, limit):
		tweetsPositivos = []
		tweetsNegativos = []
		#Pegando Positivos
		cur = self.__conn.cursor()
		sql = """ SELECT id, tokens, original, classe, emojis FROM tweets WHERE classe='positive' and tokens != '' limit {0}"""
		sql = sql.format(limit)
		cur.execute(sql)
		
		rows = cur.fetchall()
		cur.close()
		for row in rows:
			tweet = Tweet(row[0],row[1],row[2],row[3],row[4])
			tweetsPositivos.append(tweet)

		#Pegando Negativos	
		cur = self.__conn.cursor()
		sql = """ SELECT id, tokens, original, classe, emojis FROM tweets WHERE classe='negative' and tokens != '' limit {0}"""
		sql = sql.format(limit)

		cur.execute(sql)
		rows = cur.fetchall()
		cur.close()
		for row in rows:
			tweet = Tweet(row[0],row[1],row[2],row[3],row[4])
			tweetsNegativos.append(tweet)
		return tweetsPositivos, tweetsNegativos
예제 #2
0
    def save_tweet(self, tweet_dict):
        """Saves one tweet object to ES in the index of name tweet-index-<YEAR>-<#WEEK>"""
        INDEX_NAME = self.get_tweet_index()

        tweet_dict = self.purge_key_deep(tweet_dict, 'media')
        tweet_dict = self.purge_key_deep(tweet_dict, 'urls')
        tweet_dict = self.purge_key_deep(tweet_dict, 'url')
        
        time_str = tweet_dict.get('created_at', '')
        if time_str:
            tweet_dict['created_at'] = datetime.datetime.strptime(time_str, 
                                        "%a %b %d %H:%M:%S +0000 %Y")
        else:
            tweet_dict['created_at'] = None

        tweet_dict.update({u'_id': tweet_dict.get(u'id', "00000"), 
                           u'_index': INDEX_NAME})        
        tweet = Tweet(**tweet_dict)
        res = tweet.save()
        self.tweet_per_follower_count+=1
        if res:
            self.logger.info(" Success: tweet #" + \
                                str(self.tweet_per_follower_count) + \
                " for user_id: " + str(tweet_dict['user']['id']) + \
                " saved in index: " + INDEX_NAME + \
                " tweet_id: " + str(tweet_dict['id']))
        else:
            self.logger.error(" Failure: tweet #" + \
                                str(self.tweet_per_follower_count) + \
                " for user_id: " + str(tweet_dict['user']['id']) + \
                " not saved in index: " + INDEX_NAME + \
                " tweet_id: " + str(tweet_dict['id']))
예제 #3
0
def run_numerai_batch():

    # Get Reference to Properties
    config = configparser.ConfigParser()
    config.read('C:\etc\properties.ini')

    # Numerai credentials for submission
    public = config['numerai']['public']
    secret = config['numerai']['secret']

    # compute the tournament numer
    week = time.strftime("%U")
    contest = str(int(week) + 245)

    print("\n Numerai Contest..." + contest)

    # Work out directory and create if not exists
    directory = 'F:\\Numerai\\numerai' + contest + '\\'

    first = "FALSE"

    if not os.path.exists(directory):

        first = "TRUE"

        # Make new Dir
        os.makedirs(directory)

        # download dataset from numerai, save it and then load it
        nx.download(directory + 'numerai_dataset.zip')

        # Unzip it
        with ZipFile(directory + 'numerai_dataset.zip', 'r') as zipObj:

            #Extract all the contents of zip file in current directory
            zipObj.extractall(directory)

    # Run my xg boost algo on it
    rvalue = str(mechXg.main(contest))
    #rvalue = str(0.041)

    #if not first == "TRUE" :
    if not first == "FALSE":

        # Tweet
        print("Tweeting ..")
        Tweet.tweetSomething(
            'Happy New Year! I am uploading submission for numer.ai [' +
            contest + '] with correlation [' + rvalue + '] ')

        # Upload to numerai
        print("Uploading")
        names = ('nomi', )
        for name in names:
            nxj.upload(directory + name + '_submission.csv', name, public,
                       secret)

    print("All Done")
예제 #4
0
 def test_save_and_load_multiple_tweets(self):
     usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120')
     twt1 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '',
                  300, 'image url', 'URL')
     twt2 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-03-01T08:33:23.000Z', 'NO!', 120, 3929, '',
                  300, 'image url', 'URL')
     twt_list = TweetList('donny')
     twt_list.add_tweet(twt1)
     twt_list.add_tweet(twt2)
     DA = TweetDA()
     DA.save_list(twt_list)
     load_list = DA.load_all(username='******')
     self.assertEqual(twt_list, load_list)
예제 #5
0
    def getNextTweet(self):
        tweet = Tweet()
        line = self.f.readline()
        if line == "":
        #    print "Missing line:" 
            print self.tweets
            self.f.close()
            return None
        try:
            o = json.loads(line)
            #print o['doc']
            #o = json.loads(o['doc'])
        except json.decoder.JSONDecodeError as e:
            print "Problematic JSON string:"
            print line
            print e.args
            self.problems += 1
            return None

       
        # Extract GPS: try 'geo' tag, fallback to 'location' tag
        try:
            if o['doc']['geo'] != None:
                (tweet.lat, tweet.lon) = o['doc']['geo']['coordinates']
                self.GPStweets += 1
            else:
                try:
                    tweet.location = o['doc']['location']
                    match = self.rexLatLon.search(tweet.location)
                    if bool(match):
                        self.GPStweets += 1
                        (tweet.lat, tweet.lon) = float(match.group('lat')), float(match.group('lon'))
                except KeyError:
                    print "Location Key Error"
                    pass
                #raise
            self.tweets += 1
            if self.tweets%100000 == 0:
                print "Tweets so far: " + str(self.tweets)

            #tweet.WRONGuserID = o['from_user_id']
            tweet.userName = o['doc']['from_user']
            tweet.text = o['doc']['text'].encode("utf-8")
            tweet.createdAt = o['doc']['created_at']
            tweet.profile_image = o['doc']['profile_image_url']
            tweet.msgID = int(o['doc']['id'])
            #tweet.sentiment = float(o['doc']['sentiment'])
            #tweet.json = line.strip()
            tweet.datetime = datetime.strptime(tweet.createdAt, "%a, %d %b %Y %H:%M:%S +0000")
            return (tweet, line)
        except KeyError:
            print "KeyError:"
            print line # o
            print "TWEETS"
            print self.tweets
            return "Err"
예제 #6
0
파일: sleep.py 프로젝트: Kiwibird59/Sleepy
def create_tweet_objects():

    tweets_json = open(sys.argv[1])
    tweet_list = []
    t_text = None
    t_coord = None
    t_datetime = None
    t_lang = None
    AFINN_DICT = create_afinn_dict()
    
    # Code to parse and collect all non derived attributes of a tweet. eg. text, datetime, language, etc
    for jtweet in tweets_json:
        ptweet = json.loads(jtweet)
        if ptweet.has_key('text'):
            #parsing the tweet for trigger words in ['sleep', 'sleeping', 'slept'] ,calculating a sentiment score and extracting other meta data.
            word_list_raw = ptweet['text'].split()
            word_list = [word.strip('!@#$%^&*,?.').lower() for word in word_list_raw]
            
            if 'sleep' in word_list or 'sleeping' in word_list or 'slept' in word_list:
                t_score = sum([AFINN_DICT[word] for word in word_list if AFINN_DICT.has_key(word)])
            
                try:
                    t_coord = ptweet['coordinates']['coordinates']
                except:
                    t_coord = None
            
                try:
                    t_datetime = ptweet['created_at']
                except:
                    t_datetime = None
            
                try:
                    t_lang = ptweet['lang']
                except:
                    t_lang = None
            
                try:
                    t_hash = ptweet['entities']['hashtags']
                except:
                    t_hash = None
            
                tweet_object = Tweet(ptweet['text'], t_coord, t_datetime, t_lang, t_hash) #creating tweet object
                #Calculating derived attributes
                tweet_object.calculate_raw_score(word_list, AFINN_DICT)
                tweet_object.calculate_phase(MOON_DICT_13, MOON_PHASE)
                #Adding tweet_object to a list
                tweet_list.append(tweet_object)
    return tweet_list
예제 #7
0
def getTweets(username, nTweets):
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    # Gets the past 20 previous tweets
    new_tweets = api.user_timeline(screen_name=username, count=nTweets)

    tweets = []
    tweetsCSV = [tweet.text for tweet in new_tweets]
    for x in tweetsCSV:
        tweets.append((str)(emoji.demojize(x.encode('unicode_escape'))))

    dateCSV = [tweet.created_at for tweet in new_tweets]

    allData = {}  # Key =  Date, Value = tweet (Object from Tweet.py)
    for i in range(0, nTweets):
        try:
            text = re.sub(r'https:.*$', '', tweets[i])  # Removes all URLs
            text = re.sub(r'(\\[a-zA-Z0-9]+)', '', text)  # Removes all emojis
            # print "Date: {} | Text: {} | Mentions: {} | Hashtags: {}".format(dateCSV[i], text, getMentions(text), getHashTags(text))		# For testing
            allData[dateCSV[i]] = Tweet.Tweet(text, getMentions(text),
                                              getHashTags(text))
        except:
            break  # If it reaches here then we have reached the end of this persons twitter page
    return allData
예제 #8
0
def getTweets(api):
	tweets = set()
	for _ in range(180):
		thisIterTweets = makeTwitterRequest(api)
		thisIterTweets = [Tweet.Tweet(tweet) for tweet in thisIterTweets]	# convert to Tweet Object
		tweets |= set(thisIterTweets)
	return tweets
예제 #9
0
    def load_tweets(self, path_dataset):
        
        entities = os.listdir(path_dataset)
        
        for entity in entities:
            
            path_file_entity = path_dataset + "/" + entity
            tweets = []

            with open(path_file_entity, 'r') as tweets_file:
                
                for tweet in tweets_file:
                    
                    info_tweet = tweet.split('\t')
                    
                    ## id, author, entity lang , timestamp, corpus
                    id_tweet = info_tweet[0]
                    author = info_tweet[1]
                    entity_code = info_tweet[2]
                    lang = info_tweet[3]
                    timestamp = info_tweet[4]
                    corpus = info_tweet[5]
                    
                    tweets.append(tw.Tweet(id_tweet, author, entity_code, lang, timestamp, corpus))
                
            self.__tweets[entity] = tweets
예제 #10
0
def make_tweet(mytweets):
    #Get user name 
    author = input('What is your name? ')

    #Get tweet
    text = input('What would you like to tweet? ')

    #Check length of user_tweet is under 140 characters
    while len(text) > 140:
        print('Tweets can only be 140 characters')
        print()
        text = input('What would you like to tweet? ')1    

    #only save file if text is less than 140 characters
    #to prevent records with empty tweets
    if len(text) <= 140:
        #Create a tweet object named tweet-entry
        tweet_entry = Tweet.Tweet(author, text)

        #Add tweet_entry to beginning of mytweets list    
        mytweets.insert(0, tweet_entry)

        #Save the mytweets list to a file
        save_tweets(mytweets)
    
        print(author, ', your tweet has been saved.', sep='')    
예제 #11
0
def getTweets(username):
	auth = OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)
	auth_api = API(auth)
	api = tweepy.API(auth)

	# Gets the past 20 previous tweets
	new_tweets = api.user_timeline(id=username)

	tweets = []
	tweetsCSV = [tweet.text for tweet in new_tweets]
	for x in tweetsCSV:
		try:
			tweets.append(emojiPattern.remove_emoji(x))
		except:
			continue	# This is a random statement to get rid of an error

	dateCSV = [tweet.created_at for tweet in new_tweets] 

	allData = {} # Key =  Date, Value = tweet (Object from Tweet.py)
	for i in range(0, (len(tweetsCSV))):
		try:
			text = (str)(tweetsCSV[i])
			print "Text: {} | Mentions: {} | Hashtags: {}".format(text, getMentions(text), getHashTags(text))
			allData[dateCSV[i]] = Tweet.Tweet(text, getMentions(text), getHashTags(text))
		except:
			continue		
	return allData
예제 #12
0
def readXML(xmlFIle):
    tree = ET.parse(xmlFIle)
    root = tree.getroot()

    tweets = []

    for tweet in root.iter('tweet'):
        content = tweet.find('content').text

        sentiments = tweet.find('sentiments')
        polarity = sentiments[0].find('value').text

        polarity = polarityTagging(polarity)
        # polarity = polarityTagging3(polarity)

        # Other info:
        tweet_id = long(tweet.find('tweetid').text)
        user = tweet.find('user').text
        date = tweet.find('date').text
        lang = tweet.find('lang').text

        if content != None:
            tweet = tw.Tweet(tweet_id, user, date, lang, content, polarity)

            tweets.append(tweet)

    return tweets
예제 #13
0
    def test_save_and_load_tweet(self):
        usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120')
        twt = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '',
                    300, 'image url', 'URL')

        DA = TweetDA()
        DA.save_tweet(twt)
        load_twt = DA.load_tweet(twt.tweet_id)
        self.assertEqual(twt, load_twt)
예제 #14
0
 def initWithFilteredData(self, data):
     self.id = data['id']
     self.name = data['name']
     for tweetID in data['tweets']:
         tweetObj = Tweet.Tweet(data['tweets'][str(tweetID)])
         self.tweets[data['tweets'][str(tweetID)]['id']] = tweetObj
         self.tweets_count += 1
         self.urls_count += self.tweets[data['tweets'][str(tweetID)]
                                        ['id']].getAttr('urls_count')
예제 #15
0
	def selectAllNoEmoji(self):
		cur = self.__conn.cursor()
		cur.execute(""" SELECT id, tokens, original, classe, emojis FROM tweets WHERE tokens != '' and classe='NoEmoji' """)
		rows = cur.fetchall()
		cur.close()
		tweets = []
		for row in rows:
			tweet = Tweet(row[0],row[1],row[2],row[3], row[4])
			tweets.append(tweet)
		return tweets	
예제 #16
0
 def initWithRawData(self, data):
     self.id = data[0]['id']
     self.name = data[0]['name']
     for i in range(len(data)):
         if (i not in [0, 1, 2]):
             tweetObj = Tweet.Tweet(data[i])
             self.tweets[data[i]['id']] = tweetObj
             self.tweets_count += 1
             self.urls_count += self.tweets[data[i]['id']].getAttr(
                 'urls_count')
예제 #17
0
def termometer():
    '''retornar no intervalo de 175 todas as ocorrencias de hate e o tipo (img || txt)'''
    if request.method == 'GET':
        tweets = getTweets()
        response = []
        for tweet in tweets:
            t = Tweet(tweet[0], tweet[1], tweet[2], tweet[3], tweet[4])
            response.append(t)
            print(t)

    return jsonify('nudity')
 def _parse_csv(self, fname):
     #create function to help with converting the csv to a tweetlist
     df = pd.read_csv(fname)
     tweets = TweetList('output')
     #create a tweetlist from this file
     for i, row in df.iterrows():
         twt = Tweet(row['UserScreenName'], row['UserName'], self.user.user_id, row['Timestamp'], row['Text'],
                         row['Likes'], row['Retweets'], row['Emojis'], row['Comments'], row['Image link'],
                         row['Tweet URL'])
         tweets.add_tweet(twt)
     return tweets
예제 #19
0
    def getNextTweet(self):
        tweet = Tweet()
        line = self.f.readline()
        if line == "":
            self.f.close()
            return None
        try:
            o = json.loads(line)
        except json.decoder.JSONDecodeError:
            print "Problematic JSON string:"
            print line
            self.problems += 1
            return None

        # Extract GPS: try 'geo' tag, fallback to 'location' tag
        o.update({'lat': None, 'lon': None})
        if o['geo'] != None:
            self.GPStweets += 1
            (tweet.lat, tweet.lon) = o['geo']['coordinates']
            (o['lat'], o['lon']) = o['geo']['coordinates']
        else:
            try:
                tweet.location = o['location']
                match = self.rexLatLon.search(tweet.location)
                if bool(match):
                    self.GPStweets += 1
                    (tweet.lat, tweet.lon) = float(match.group('lat')), float(
                        match.group('lon'))
                    (o['lat'], o['lon']) = (tweet.lat, tweet.lon)
            except KeyError:
                print "Location Key Error"
                pass
                #raise

        self.tweets += 1
        if self.tweets % 100000 == 0:
            print "Tweets so far: " + str(self.tweets)

        #tweet.WRONGuserID = o['from_user_id']
        tweet.userName = o['from_user']
        tweet.text = o['text'].encode("utf-8")
        tweet.createdAt = o['created_at']
        tweet.profile_image = o['profile_image_url']
        tweet.msgID = int(o['id'])
        tweet.json = line.strip()
        return (o, tweet, line)
예제 #20
0
def test4(training_data, folds: int, portionOfDataSet: float,
          test_ratio: float):
    """ This is with more tokens 
    """
    X = []  # training data
    y = []  # Class labels. Disaster or not disaster
    Tweets = []
    print("Using {} % of data set...".format(portionOfDataSet * 100))
    print("Parsing text...")
    for i in range(int(len(training_data.index) * portionOfDataSet)):
        Tweets.append(
            Tweet.Tweet(training_data["statusid"], training_data["ttext"]))
예제 #21
0
    def get_tweets(self):

        tweet_objects = self.response_json["data"]
        tweets = []
        for obj in tweet_objects:
            tweets.append(
                Tweet.Tweet(
                    id=obj["id"],
                    text=obj["text"],
                    # TODO: decide whether to alter Tweet class
                    author='hypefury'))

        return tweets
예제 #22
0
 def __init__(self, event, queue, results):
     train_dataset, test_dataset = Tweet.get_flattened_data('dataset/k4/training.json', 'dataset/k4/testing.json', 'dataset/k4/root_tweet.json', 4)
     multiprocessing.Process.__init__(self)
     self._event = event
     self._queue = queue
     self._results = results
     self.daemon = True
     self._tag_count = 2
     self._preprocesssed_results = [json.load(open("dataset/NaiveBayesCascadeClassifier_results.json", 'r')),
                                    json.load(open("dataset/RandomForestCascadeClassifier_results.json", 'r')),
                                    json.load(open("dataset/knnClassifier_results.json", 'r')),
                                    json.load(open("dataset/SvmCascadeClassifier_results.json", 'r'))]
     self._cascades = test_dataset
예제 #23
0
def make_tweet():
    """ This function will take in the information required to make a tweet
    and then append it to the tweet list, while checking for errors """

    author = input('What is your name? ')

    while True:
        message = input('What would you like to tweet? ')
        if len(message) > 140:
            print('Tweets can only be 140 characters!')
        else:
            tweet = tw.Tweet(author, message)
            tweet_list.append(tweet)
            print(tweet.get_author() + ', your tweet has been saved.')
            break
예제 #24
0
    def getNextTweet(self):
        tweet = Tweet()
        line = self.f.readline()
        if line == "":
            self.f.close()
            return None
        try:
            o = json.loads(line)
        except json.decoder.JSONDecodeError:
            print "Problematic JSON string:"
            print line
            self.problems += 1
            return None

       
        # Extract GPS: try 'geo' tag, fallback to 'location' tag
        o.update({'lat': None, 'lon': None})
        if o['geo'] != None:
            self.GPStweets += 1
            (tweet.lat, tweet.lon) = o['geo']['coordinates']
            (o['lat'], o['lon']) = o['geo']['coordinates']
        else:
            try:
                tweet.location = o['location']
                match = self.rexLatLon.search(tweet.location)
                if bool(match):
                    self.GPStweets += 1
                    (tweet.lat, tweet.lon) = float(match.group('lat')), float(match.group('lon'))
                    (o['lat'], o['lon']) = (tweet.lat, tweet.lon)
            except KeyError:
                print "Location Key Error"
                pass
                #raise

        self.tweets += 1
        if self.tweets%100000 == 0:
            print "Tweets so far: " + str(self.tweets)

        #tweet.WRONGuserID = o['from_user_id']
        tweet.userName = o['from_user']
        tweet.text = o['text'].encode("utf-8")
        tweet.createdAt = o['created_at']
        tweet.profile_image = o['profile_image_url']
        tweet.msgID = int(o['id'])
        tweet.json = line.strip()
        return (o, tweet, line)
예제 #25
0
    def write_tweets(self):

        print "Getting tweets"

        searched_tweets = [
            status for status in tweepy.Cursor(self.twitter_api.search,
                                               q=self.stock_query).items(
                                                   self.max_limit)
        ]
        collected_tweets = {}
        for status in searched_tweets:

            tweet_id = status.id
            text = repr(status.text)
            retweet_count = status.retweet_count
            quotes_count = "--not implemented--"  # HOW to get this?
            favorites_count = status.favorite_count
            author = status.author
            is_news = "--not implemented--"  # DEFINE LIST OF VALID NEWS AUTHORS
            timestamp = status.created_at

            # Instantiate tweet object, which processes the info
            tweet = Tweet.Tweet(tweet_id=tweet_id,
                                text=text,
                                retweet_count=retweet_count,
                                quotes_count=quotes_count,
                                favorites_count=favorites_count,
                                author=author,
                                is_news=is_news,
                                timestamp=timestamp)

            # Sets the path of csv file to first timestamp we gather
            if self.csv_path == "":
                self.csv_path = str(self.stock_query[1:]) + str(
                    tweet.timestamp) + ".csv"
                print "Writing data to: " + self.csv_path
                tweet.writeHeaderToCsv(self.csv_path)

            #hashmap for duplicating tweets
            if tweet.tweet_id not in collected_tweets.keys():
                collected_tweets[tweet.tweet_id] = tweet

        # Writes the processed info to the file at csv_path
        for tweet_id in collected_tweets.keys():
            collected_tweets[tweet_id].writeDataToCsv(self.csv_path)
예제 #26
0
 def read_data(infile):
     data = Tweet.load_from_tsv(infile)
     X = []
     y = []
     for tweet in data:
         if tweet.label == 'neutral':
             continue
         text = tweet.raw_text.lower()
         ints = []
         for w in text.split(' '):
             if w in vmap:
                 ints.append(vmap[w])
         lv = label_map[tweet.label]
         X.append(ints)
         y.append(lv)
     X = pad_mask(X)
     y = np.asarray(y, dtype=np.int32)
     return X, y
예제 #27
0
    def processTweetFile(self, jfile):
        tweetObjs = []
        tweets = io_geojson.processTweets(jfile)
        for t in tweets:
            tweetObjs.append(Tweet.Tweet(t))

        random.seed(1212)

        # create new map for new file
        self.map = folium.Map(location=phx_coords)
        self.map.zoom_start = 8

        for tw in tweetObjs:
            latitude, longitude = tw.getRandPointInBoundingBox()
            folium.Marker([latitude, longitude], popup=tw.twScreenName).add_to(self.map)

        self.map.save(self.mapFile)
        self.webView.setHtml(open(self.mapFile,'r').read())
    def transform_and_load(self, json_response, recreate_db):

        # inspect response line (optional)
        print("json printed: ",
              json.dumps(json_response, indent=4, sort_keys=True))

        # dismantle the fields
        tweet_id = json_response["data"]["id"]
        tweet_text = json_response["data"]["text"]
        tweet_lang = json_response["data"]["lang"]
        tweet_created_at = json_response["data"]["created_at"]
        tweet_place_id = json_response["includes"]["places"][0]["id"]
        tweet_place_geo_bbox = json_response["includes"]["places"][0]["geo"][
            "bbox"]
        tweet_place_full_name = json_response["includes"]["places"][0][
            "full_name"]
        tweet_place_type = json_response["includes"]["places"][0]["place_type"]
        tweet_country_code = json_response["includes"]["places"][0][
            "country_code"]
        stream_rule_tag = json_response["matching_rules"][0]["tag"]

        # construct tweet_data_dict
        tweet_data_dict = {
            'twitter_id': tweet_id,
            'text': tweet_text,
            'lang': tweet_lang,
            'created_at': tweet_created_at,
            'places_geo_place_id': tweet_place_id,
            'places_geo_bbox': tweet_place_geo_bbox,
            'places_full_name': tweet_place_full_name,
            'places_place_type': tweet_place_type,
            'places_country_code': tweet_country_code,
            'stream_rule_tag': stream_rule_tag
        }

        # construct a Tweet() object
        # data passed in to Tweet() has to be in a dictionary format
        single_tweet = Tweet(**tweet_data_dict)

        # inspect transformed Tweet() object
        print("single_tweet: ", single_tweet)

        # load data
        self.start_load(single_tweet, recreate_db)
예제 #29
0
def append_next_set(max_tweet_id):
    user_timeline = twitter.get_user_timeline(user_id=TWITTER_USER_ID,
                                              count=200,
                                              max_id=max_tweet_id,
                                              trim_user=True,
                                              exclude_replies=True,
                                              include_rts=False)

    for item in user_timeline:
        tweet_id = item["id"]
        tweet_status = item["text"]
        tweet_date = item["created_at"]
        rt_count = item["retweet_count"]
        fav_count = item["favorite_count"]

        next_tweet = Tweet.Tweet(tweet_id, tweet_status, tweet_date, rt_count,
                                 fav_count)

        if (len(ALL_TWEETS) == 0) or (
                tweet_id != ALL_TWEETS[len(ALL_TWEETS) - 1].tweet_id):
            ALL_TWEETS.append(next_tweet)
예제 #30
0
def read_data_from_database():
    mydb = mysql.connector.connect(user="******",
                                   password="******",
                                   host="localhost",
                                   database="tweetermysql_2018-07-30")

    my_cursor = mydb.cursor()

    my_cursor.execute(
        ' select ID, MetaData, Date, Section from news_2017_01 where day(Date) = "1" limit 2'
    )

    my_result = my_cursor.fetchall()

    data = list()

    for item in my_result:
        u, m_data, date, section = item
        t = tweet.Tweet(u, section, m_data,
                        datetime.strptime(date, '%Y-%m-%d'))
        data.append(t)

    return data
예제 #31
0
def create_tweet(tweets):
    # Ask for the user's name
    name = input("\nWhat is your name? ")

    while (True):
        # Ask for the user's message
        text = input("What would you like to tweet? ")

        # If the tweet is too long, display an error and ask again
        if (len(text) > 140):
            print("\nTweets can only be 140 characters!\n")
        else:
            # ...otherwise, the tweet is <140 characters, so stop looping
            break

    # Create a Tweet object using the user's name and message
    tweet = Tweet.Tweet(name, text)

    # Add the Tweet to the tweets list
    tweets.append(tweet)

    # Print a confirmation that the Tweet has been made
    print(name, ", your tweet has been saved.", sep="")
예제 #32
0
def load_annotations(annotation_file, source_file, annotation_type):
    dom = minidom.parse(annotation_file)
    all_annotations = Set()
    all_class_mentions = {}

    annotations = dom.getElementsByTagName("annotation")
    class_mentions = dom.getElementsByTagName("classMention")

    for cm in [cm for cm in class_mentions if cm.getElementsByTagName("mentionClass")[0].attributes["id"].value == annotation_type]:
        all_class_mentions[cm.attributes["id"].value] = cm.getElementsByTagName("mentionClass")[0].attributes["id"].value

    source_data = ""
    with open(source_file) as sf::
        for line in sf:
            source_data += line

    for a in annotations:
        ann = Tweet.Annotation()

        if len(a.getElementsByTagName("annotator")) > 0 and annotator_okay(a.getElementsByTagName("annotator")[0], annotation_file):
            if len(a.getElementsByTagName("mention")) > 0:
                if len(a.getElementsByTagName("span")) > 0:
                    if len(a.getElementsByTagName("spannedText")) > 0 and a.getElementsByTagName("spannedText")[0].firstChild != None:
                        ann.ann_id = a.getElementsByTagName("mention")[0].attributes["id"].value
                        ann.span_start = int(a.getElementsByTagName("span")[0].attributes["start"].value)
                        ann.span_end = int(a.getElementsByTagName("span")[0].attributes["end"].value)
                        ann.span_text = a.getElementsByTagName("spannedText")[0].firstChild.nodeValue.encode("utf-8")
        if ann.ann_id != "":
            if not source_data[ann.span_start:ann.span_end] == ann.span_text:
                if ann.span_text in source_data[ann.span_start-5:ann.span_end+5]:
                    ann.span_start = ann.span_start-5+ (source_data[ann.span_start-5:ann.span_end+5].find(ann.span_text))
                    ann.span_end = ann.span_start + len(ann.span_text)
        if ann.ann_id in all_class_mentions:
            ann.entity_type += all_class_mentions[ann.ann_id] + " "
            all_annotations.add(ann)

    return all_annotations
예제 #33
0
def load_dataset(train_file, val_file, test_file):
    ''' Load the semeval twitter data
        Use Kate's Tweet class
    '''

    class arbit:
        super
    args = arbit()

    args.subtask_id = 'a'
    args.train_file = train_file
    args.dev_file = val_file
    args.test_file = test_file

    train, val, test = Tweet.load_datasets(args)

    X_train, y_train, vmap = preprocess(train)
    X_val, y_val, _ = preprocess(val, vmap)
    X_test, y_test, _ = preprocess(test, vmap)

    X_train, X_val, X_test = map(pad_mask, [X_train, X_val, X_test])
    y_train, y_val, y_test = map(np.asarray, [y_train, y_val, y_test])

    return X_train, y_train, X_val, y_val, X_test, vmap
예제 #34
0
import telepot, timefrom nltk.chat.iesha import iesha_chatbotfrom tweep import Tweet
tweet_client = Tweet()is_chatting = False
def handle(msg): global is_chatting global tweet_client    chat_id = msg['chat']['id']    command = msg['text'] print 'Got command: %s' % command if command == '/hello' and not is_chatting:        bot.sendMessage(chat_id, 'Hello, how are you?') elif command == '/timeline' and not is_chatting:        bot.sendMessage(chat_id, '\n'.join([message.text for message in tweet_client.handle.home_timeline()])) elif command.split('=')[0] == '/tweet' and not is_chatting: try:            tweet_client.hitme(command.split('=')[1] + ' #mika')            bot.sendMessage(chat_id, 'Your message tweeted successfully') except:            bot.sendMessage(chat_id, 'There is some problem tweeting! Try after some time') elif command == '/chat':        is_chatting = True        bot.sendMessage(chat_id, 'Hi I am Iesha. Who are You?') elif command == '/stopchat':        is_chatting = False        bot.sendMessage(chat_id, 'Bye Bye. take care!') elif not command.startswith('/') and is_chatting:        bot.sendMessage(chat_id, iesha_chatbot.respond(command)) else: pass

# Create a bot object with API keybot = telepot.Bot('152871568:AAFRaZ6ibZQ52wXXXXXXXXXXXXXX')
# Attach a function to notifyOnMessage call backbot.notifyOnMessage(handle)
# Listen to the messageswhile 1: time.sleep(10)
예제 #35
0
            for cell in IWList(interface).getData().values():
                match = re.match(r"(\d+)/(\d+)", cell["Signal"])
                strength_nu = match.group(1)
                strength_de = match.group(2)
                strength = float(strength_nu) / float(strength_de)
                essid = cell["ESSID"]
                mac = cell["MAC"]

                if essid == "":
                    continue

                # Keep track of the strength of our primary network.
                if essid == "Westmont_Encrypted" and strength > strongest_signal:
                    strongest_signal = strength

                Tweet.bark(essid, json_data)

                # Write to db
                values = [
                          str(time.time()),
                          str(mac),
                          str(essid),
                          str(strength),
                          str(gpsdata["lat"]),
                          str(gpsdata["lon"]),
                          str(gpsdata["alt"])
                         ]

                cur = con.cursor()
                cur.execute("INSERT INTO wifis values('" + "','".join(values) + "')")
            con.commit()
예제 #36
0
    def getNextTweet(self):
        tweet = Tweet()
        line = self.f.readline()
        if line == "":
        #    print "Missing line:" 
            print (self.tweets,file=sys.stderr) 
            self.f.close()
            return None
        try:
            o = json.loads(line)
            #print o['doc']
            #o = json.loads(o['doc'])
        except json.decoder.JSONDecodeError as e:
            print ("Problematic JSON string:",file=sys.stderr)
            print (line,file=sys.stderr)
            print (e.args,file=sys.stderr)
            self.problems += 1
            return None

       
        # Extract GPS: try 'geo' tag, fallback to 'location' tag
        try:
            if o['doc']['geo'] != None:
                (tweet.lat, tweet.lon) = o['doc']['geo']['coordinates']
                self.GPStweets += 1
            else:
                try:
                    tweet.location = o['doc']['location']
                    match = self.rexLatLon.search(tweet.location)
                    if bool(match):
                        self.GPStweets += 1
                        (tweet.lat, tweet.lon) = float(match.group('lat')), float(match.group('lon'))
                except KeyError:
                    print ("Location Key Error", file=sys.stderr)
                    pass
                #raise
            self.tweets += 1
            if self.tweets%100000 == 0:
                print ("Tweets so far: " + str(self.tweets), file=sys.stderr)

            #Tweet.WRONGuserID = o['from_user_id']
            tweet.userName = o['doc']['from_user']
            if o['doc']['to_user_id'] != None:
                tweet.toUser = o['doc']['to_user']
            else:
                tweet.toUser = None
            tweet.text = o['doc']['text'].encode("utf-8")
            tweet.health = o['doc']['health']
            tweet.createdAt = o['doc']['created_at']
            tweet.profile_image = o['doc']['profile_image_url']
            tweet.msgID = int(o['doc']['id'])
            #print("333333333")
            #tweet.sentiment = float(o['doc']['sentiment'])
            #commenting negemo posemo (PAVAN)
            #tweet.negemo = float(o['doc']['negemo'])
            # tweet.sad = float(o['doc']['sad'])
            #tweet.posemo = float(o['doc']['posemo'])
            #tweet.anger = float(o['doc']['anger'])
            #tweet.friends = float(o['doc']['friends'])
            #print("44444444444")
            tweet.datetime = datetime.strptime(tweet.createdAt, "%a, %d %b %Y %H:%M:%S +0000") - timedelta(hours=4)
            return (tweet, line)
        except KeyError:
            print ("KeyError:",file=sys.stderr)
            print (line, file=sys.stderr)# o
            print ("TWEETS",file=sys.stderr)
            print (self.tweets,file=sys.stderr)
            return ("Err")