def selectClassesLimit(self, limit): tweetsPositivos = [] tweetsNegativos = [] #Pegando Positivos cur = self.__conn.cursor() sql = """ SELECT id, tokens, original, classe, emojis FROM tweets WHERE classe='positive' and tokens != '' limit {0}""" sql = sql.format(limit) cur.execute(sql) rows = cur.fetchall() cur.close() for row in rows: tweet = Tweet(row[0],row[1],row[2],row[3],row[4]) tweetsPositivos.append(tweet) #Pegando Negativos cur = self.__conn.cursor() sql = """ SELECT id, tokens, original, classe, emojis FROM tweets WHERE classe='negative' and tokens != '' limit {0}""" sql = sql.format(limit) cur.execute(sql) rows = cur.fetchall() cur.close() for row in rows: tweet = Tweet(row[0],row[1],row[2],row[3],row[4]) tweetsNegativos.append(tweet) return tweetsPositivos, tweetsNegativos
def save_tweet(self, tweet_dict): """Saves one tweet object to ES in the index of name tweet-index-<YEAR>-<#WEEK>""" INDEX_NAME = self.get_tweet_index() tweet_dict = self.purge_key_deep(tweet_dict, 'media') tweet_dict = self.purge_key_deep(tweet_dict, 'urls') tweet_dict = self.purge_key_deep(tweet_dict, 'url') time_str = tweet_dict.get('created_at', '') if time_str: tweet_dict['created_at'] = datetime.datetime.strptime(time_str, "%a %b %d %H:%M:%S +0000 %Y") else: tweet_dict['created_at'] = None tweet_dict.update({u'_id': tweet_dict.get(u'id', "00000"), u'_index': INDEX_NAME}) tweet = Tweet(**tweet_dict) res = tweet.save() self.tweet_per_follower_count+=1 if res: self.logger.info(" Success: tweet #" + \ str(self.tweet_per_follower_count) + \ " for user_id: " + str(tweet_dict['user']['id']) + \ " saved in index: " + INDEX_NAME + \ " tweet_id: " + str(tweet_dict['id'])) else: self.logger.error(" Failure: tweet #" + \ str(self.tweet_per_follower_count) + \ " for user_id: " + str(tweet_dict['user']['id']) + \ " not saved in index: " + INDEX_NAME + \ " tweet_id: " + str(tweet_dict['id']))
def run_numerai_batch(): # Get Reference to Properties config = configparser.ConfigParser() config.read('C:\etc\properties.ini') # Numerai credentials for submission public = config['numerai']['public'] secret = config['numerai']['secret'] # compute the tournament numer week = time.strftime("%U") contest = str(int(week) + 245) print("\n Numerai Contest..." + contest) # Work out directory and create if not exists directory = 'F:\\Numerai\\numerai' + contest + '\\' first = "FALSE" if not os.path.exists(directory): first = "TRUE" # Make new Dir os.makedirs(directory) # download dataset from numerai, save it and then load it nx.download(directory + 'numerai_dataset.zip') # Unzip it with ZipFile(directory + 'numerai_dataset.zip', 'r') as zipObj: #Extract all the contents of zip file in current directory zipObj.extractall(directory) # Run my xg boost algo on it rvalue = str(mechXg.main(contest)) #rvalue = str(0.041) #if not first == "TRUE" : if not first == "FALSE": # Tweet print("Tweeting ..") Tweet.tweetSomething( 'Happy New Year! I am uploading submission for numer.ai [' + contest + '] with correlation [' + rvalue + '] ') # Upload to numerai print("Uploading") names = ('nomi', ) for name in names: nxj.upload(directory + name + '_submission.csv', name, public, secret) print("All Done")
def test_save_and_load_multiple_tweets(self): usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120') twt1 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '', 300, 'image url', 'URL') twt2 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-03-01T08:33:23.000Z', 'NO!', 120, 3929, '', 300, 'image url', 'URL') twt_list = TweetList('donny') twt_list.add_tweet(twt1) twt_list.add_tweet(twt2) DA = TweetDA() DA.save_list(twt_list) load_list = DA.load_all(username='******') self.assertEqual(twt_list, load_list)
def getNextTweet(self): tweet = Tweet() line = self.f.readline() if line == "": # print "Missing line:" print self.tweets self.f.close() return None try: o = json.loads(line) #print o['doc'] #o = json.loads(o['doc']) except json.decoder.JSONDecodeError as e: print "Problematic JSON string:" print line print e.args self.problems += 1 return None # Extract GPS: try 'geo' tag, fallback to 'location' tag try: if o['doc']['geo'] != None: (tweet.lat, tweet.lon) = o['doc']['geo']['coordinates'] self.GPStweets += 1 else: try: tweet.location = o['doc']['location'] match = self.rexLatLon.search(tweet.location) if bool(match): self.GPStweets += 1 (tweet.lat, tweet.lon) = float(match.group('lat')), float(match.group('lon')) except KeyError: print "Location Key Error" pass #raise self.tweets += 1 if self.tweets%100000 == 0: print "Tweets so far: " + str(self.tweets) #tweet.WRONGuserID = o['from_user_id'] tweet.userName = o['doc']['from_user'] tweet.text = o['doc']['text'].encode("utf-8") tweet.createdAt = o['doc']['created_at'] tweet.profile_image = o['doc']['profile_image_url'] tweet.msgID = int(o['doc']['id']) #tweet.sentiment = float(o['doc']['sentiment']) #tweet.json = line.strip() tweet.datetime = datetime.strptime(tweet.createdAt, "%a, %d %b %Y %H:%M:%S +0000") return (tweet, line) except KeyError: print "KeyError:" print line # o print "TWEETS" print self.tweets return "Err"
def create_tweet_objects(): tweets_json = open(sys.argv[1]) tweet_list = [] t_text = None t_coord = None t_datetime = None t_lang = None AFINN_DICT = create_afinn_dict() # Code to parse and collect all non derived attributes of a tweet. eg. text, datetime, language, etc for jtweet in tweets_json: ptweet = json.loads(jtweet) if ptweet.has_key('text'): #parsing the tweet for trigger words in ['sleep', 'sleeping', 'slept'] ,calculating a sentiment score and extracting other meta data. word_list_raw = ptweet['text'].split() word_list = [word.strip('!@#$%^&*,?.').lower() for word in word_list_raw] if 'sleep' in word_list or 'sleeping' in word_list or 'slept' in word_list: t_score = sum([AFINN_DICT[word] for word in word_list if AFINN_DICT.has_key(word)]) try: t_coord = ptweet['coordinates']['coordinates'] except: t_coord = None try: t_datetime = ptweet['created_at'] except: t_datetime = None try: t_lang = ptweet['lang'] except: t_lang = None try: t_hash = ptweet['entities']['hashtags'] except: t_hash = None tweet_object = Tweet(ptweet['text'], t_coord, t_datetime, t_lang, t_hash) #creating tweet object #Calculating derived attributes tweet_object.calculate_raw_score(word_list, AFINN_DICT) tweet_object.calculate_phase(MOON_DICT_13, MOON_PHASE) #Adding tweet_object to a list tweet_list.append(tweet_object) return tweet_list
def getTweets(username, nTweets): auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # Gets the past 20 previous tweets new_tweets = api.user_timeline(screen_name=username, count=nTweets) tweets = [] tweetsCSV = [tweet.text for tweet in new_tweets] for x in tweetsCSV: tweets.append((str)(emoji.demojize(x.encode('unicode_escape')))) dateCSV = [tweet.created_at for tweet in new_tweets] allData = {} # Key = Date, Value = tweet (Object from Tweet.py) for i in range(0, nTweets): try: text = re.sub(r'https:.*$', '', tweets[i]) # Removes all URLs text = re.sub(r'(\\[a-zA-Z0-9]+)', '', text) # Removes all emojis # print "Date: {} | Text: {} | Mentions: {} | Hashtags: {}".format(dateCSV[i], text, getMentions(text), getHashTags(text)) # For testing allData[dateCSV[i]] = Tweet.Tweet(text, getMentions(text), getHashTags(text)) except: break # If it reaches here then we have reached the end of this persons twitter page return allData
def getTweets(api): tweets = set() for _ in range(180): thisIterTweets = makeTwitterRequest(api) thisIterTweets = [Tweet.Tweet(tweet) for tweet in thisIterTweets] # convert to Tweet Object tweets |= set(thisIterTweets) return tweets
def load_tweets(self, path_dataset): entities = os.listdir(path_dataset) for entity in entities: path_file_entity = path_dataset + "/" + entity tweets = [] with open(path_file_entity, 'r') as tweets_file: for tweet in tweets_file: info_tweet = tweet.split('\t') ## id, author, entity lang , timestamp, corpus id_tweet = info_tweet[0] author = info_tweet[1] entity_code = info_tweet[2] lang = info_tweet[3] timestamp = info_tweet[4] corpus = info_tweet[5] tweets.append(tw.Tweet(id_tweet, author, entity_code, lang, timestamp, corpus)) self.__tweets[entity] = tweets
def make_tweet(mytweets): #Get user name author = input('What is your name? ') #Get tweet text = input('What would you like to tweet? ') #Check length of user_tweet is under 140 characters while len(text) > 140: print('Tweets can only be 140 characters') print() text = input('What would you like to tweet? ')1 #only save file if text is less than 140 characters #to prevent records with empty tweets if len(text) <= 140: #Create a tweet object named tweet-entry tweet_entry = Tweet.Tweet(author, text) #Add tweet_entry to beginning of mytweets list mytweets.insert(0, tweet_entry) #Save the mytweets list to a file save_tweets(mytweets) print(author, ', your tweet has been saved.', sep='')
def getTweets(username): auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) auth_api = API(auth) api = tweepy.API(auth) # Gets the past 20 previous tweets new_tweets = api.user_timeline(id=username) tweets = [] tweetsCSV = [tweet.text for tweet in new_tweets] for x in tweetsCSV: try: tweets.append(emojiPattern.remove_emoji(x)) except: continue # This is a random statement to get rid of an error dateCSV = [tweet.created_at for tweet in new_tweets] allData = {} # Key = Date, Value = tweet (Object from Tweet.py) for i in range(0, (len(tweetsCSV))): try: text = (str)(tweetsCSV[i]) print "Text: {} | Mentions: {} | Hashtags: {}".format(text, getMentions(text), getHashTags(text)) allData[dateCSV[i]] = Tweet.Tweet(text, getMentions(text), getHashTags(text)) except: continue return allData
def readXML(xmlFIle): tree = ET.parse(xmlFIle) root = tree.getroot() tweets = [] for tweet in root.iter('tweet'): content = tweet.find('content').text sentiments = tweet.find('sentiments') polarity = sentiments[0].find('value').text polarity = polarityTagging(polarity) # polarity = polarityTagging3(polarity) # Other info: tweet_id = long(tweet.find('tweetid').text) user = tweet.find('user').text date = tweet.find('date').text lang = tweet.find('lang').text if content != None: tweet = tw.Tweet(tweet_id, user, date, lang, content, polarity) tweets.append(tweet) return tweets
def test_save_and_load_tweet(self): usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120') twt = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '', 300, 'image url', 'URL') DA = TweetDA() DA.save_tweet(twt) load_twt = DA.load_tweet(twt.tweet_id) self.assertEqual(twt, load_twt)
def initWithFilteredData(self, data): self.id = data['id'] self.name = data['name'] for tweetID in data['tweets']: tweetObj = Tweet.Tweet(data['tweets'][str(tweetID)]) self.tweets[data['tweets'][str(tweetID)]['id']] = tweetObj self.tweets_count += 1 self.urls_count += self.tweets[data['tweets'][str(tweetID)] ['id']].getAttr('urls_count')
def selectAllNoEmoji(self): cur = self.__conn.cursor() cur.execute(""" SELECT id, tokens, original, classe, emojis FROM tweets WHERE tokens != '' and classe='NoEmoji' """) rows = cur.fetchall() cur.close() tweets = [] for row in rows: tweet = Tweet(row[0],row[1],row[2],row[3], row[4]) tweets.append(tweet) return tweets
def initWithRawData(self, data): self.id = data[0]['id'] self.name = data[0]['name'] for i in range(len(data)): if (i not in [0, 1, 2]): tweetObj = Tweet.Tweet(data[i]) self.tweets[data[i]['id']] = tweetObj self.tweets_count += 1 self.urls_count += self.tweets[data[i]['id']].getAttr( 'urls_count')
def termometer(): '''retornar no intervalo de 175 todas as ocorrencias de hate e o tipo (img || txt)''' if request.method == 'GET': tweets = getTweets() response = [] for tweet in tweets: t = Tweet(tweet[0], tweet[1], tweet[2], tweet[3], tweet[4]) response.append(t) print(t) return jsonify('nudity')
def _parse_csv(self, fname): #create function to help with converting the csv to a tweetlist df = pd.read_csv(fname) tweets = TweetList('output') #create a tweetlist from this file for i, row in df.iterrows(): twt = Tweet(row['UserScreenName'], row['UserName'], self.user.user_id, row['Timestamp'], row['Text'], row['Likes'], row['Retweets'], row['Emojis'], row['Comments'], row['Image link'], row['Tweet URL']) tweets.add_tweet(twt) return tweets
def getNextTweet(self): tweet = Tweet() line = self.f.readline() if line == "": self.f.close() return None try: o = json.loads(line) except json.decoder.JSONDecodeError: print "Problematic JSON string:" print line self.problems += 1 return None # Extract GPS: try 'geo' tag, fallback to 'location' tag o.update({'lat': None, 'lon': None}) if o['geo'] != None: self.GPStweets += 1 (tweet.lat, tweet.lon) = o['geo']['coordinates'] (o['lat'], o['lon']) = o['geo']['coordinates'] else: try: tweet.location = o['location'] match = self.rexLatLon.search(tweet.location) if bool(match): self.GPStweets += 1 (tweet.lat, tweet.lon) = float(match.group('lat')), float( match.group('lon')) (o['lat'], o['lon']) = (tweet.lat, tweet.lon) except KeyError: print "Location Key Error" pass #raise self.tweets += 1 if self.tweets % 100000 == 0: print "Tweets so far: " + str(self.tweets) #tweet.WRONGuserID = o['from_user_id'] tweet.userName = o['from_user'] tweet.text = o['text'].encode("utf-8") tweet.createdAt = o['created_at'] tweet.profile_image = o['profile_image_url'] tweet.msgID = int(o['id']) tweet.json = line.strip() return (o, tweet, line)
def test4(training_data, folds: int, portionOfDataSet: float, test_ratio: float): """ This is with more tokens """ X = [] # training data y = [] # Class labels. Disaster or not disaster Tweets = [] print("Using {} % of data set...".format(portionOfDataSet * 100)) print("Parsing text...") for i in range(int(len(training_data.index) * portionOfDataSet)): Tweets.append( Tweet.Tweet(training_data["statusid"], training_data["ttext"]))
def get_tweets(self): tweet_objects = self.response_json["data"] tweets = [] for obj in tweet_objects: tweets.append( Tweet.Tweet( id=obj["id"], text=obj["text"], # TODO: decide whether to alter Tweet class author='hypefury')) return tweets
def __init__(self, event, queue, results): train_dataset, test_dataset = Tweet.get_flattened_data('dataset/k4/training.json', 'dataset/k4/testing.json', 'dataset/k4/root_tweet.json', 4) multiprocessing.Process.__init__(self) self._event = event self._queue = queue self._results = results self.daemon = True self._tag_count = 2 self._preprocesssed_results = [json.load(open("dataset/NaiveBayesCascadeClassifier_results.json", 'r')), json.load(open("dataset/RandomForestCascadeClassifier_results.json", 'r')), json.load(open("dataset/knnClassifier_results.json", 'r')), json.load(open("dataset/SvmCascadeClassifier_results.json", 'r'))] self._cascades = test_dataset
def make_tweet(): """ This function will take in the information required to make a tweet and then append it to the tweet list, while checking for errors """ author = input('What is your name? ') while True: message = input('What would you like to tweet? ') if len(message) > 140: print('Tweets can only be 140 characters!') else: tweet = tw.Tweet(author, message) tweet_list.append(tweet) print(tweet.get_author() + ', your tweet has been saved.') break
def getNextTweet(self): tweet = Tweet() line = self.f.readline() if line == "": self.f.close() return None try: o = json.loads(line) except json.decoder.JSONDecodeError: print "Problematic JSON string:" print line self.problems += 1 return None # Extract GPS: try 'geo' tag, fallback to 'location' tag o.update({'lat': None, 'lon': None}) if o['geo'] != None: self.GPStweets += 1 (tweet.lat, tweet.lon) = o['geo']['coordinates'] (o['lat'], o['lon']) = o['geo']['coordinates'] else: try: tweet.location = o['location'] match = self.rexLatLon.search(tweet.location) if bool(match): self.GPStweets += 1 (tweet.lat, tweet.lon) = float(match.group('lat')), float(match.group('lon')) (o['lat'], o['lon']) = (tweet.lat, tweet.lon) except KeyError: print "Location Key Error" pass #raise self.tweets += 1 if self.tweets%100000 == 0: print "Tweets so far: " + str(self.tweets) #tweet.WRONGuserID = o['from_user_id'] tweet.userName = o['from_user'] tweet.text = o['text'].encode("utf-8") tweet.createdAt = o['created_at'] tweet.profile_image = o['profile_image_url'] tweet.msgID = int(o['id']) tweet.json = line.strip() return (o, tweet, line)
def write_tweets(self): print "Getting tweets" searched_tweets = [ status for status in tweepy.Cursor(self.twitter_api.search, q=self.stock_query).items( self.max_limit) ] collected_tweets = {} for status in searched_tweets: tweet_id = status.id text = repr(status.text) retweet_count = status.retweet_count quotes_count = "--not implemented--" # HOW to get this? favorites_count = status.favorite_count author = status.author is_news = "--not implemented--" # DEFINE LIST OF VALID NEWS AUTHORS timestamp = status.created_at # Instantiate tweet object, which processes the info tweet = Tweet.Tweet(tweet_id=tweet_id, text=text, retweet_count=retweet_count, quotes_count=quotes_count, favorites_count=favorites_count, author=author, is_news=is_news, timestamp=timestamp) # Sets the path of csv file to first timestamp we gather if self.csv_path == "": self.csv_path = str(self.stock_query[1:]) + str( tweet.timestamp) + ".csv" print "Writing data to: " + self.csv_path tweet.writeHeaderToCsv(self.csv_path) #hashmap for duplicating tweets if tweet.tweet_id not in collected_tweets.keys(): collected_tweets[tweet.tweet_id] = tweet # Writes the processed info to the file at csv_path for tweet_id in collected_tweets.keys(): collected_tweets[tweet_id].writeDataToCsv(self.csv_path)
def read_data(infile): data = Tweet.load_from_tsv(infile) X = [] y = [] for tweet in data: if tweet.label == 'neutral': continue text = tweet.raw_text.lower() ints = [] for w in text.split(' '): if w in vmap: ints.append(vmap[w]) lv = label_map[tweet.label] X.append(ints) y.append(lv) X = pad_mask(X) y = np.asarray(y, dtype=np.int32) return X, y
def processTweetFile(self, jfile): tweetObjs = [] tweets = io_geojson.processTweets(jfile) for t in tweets: tweetObjs.append(Tweet.Tweet(t)) random.seed(1212) # create new map for new file self.map = folium.Map(location=phx_coords) self.map.zoom_start = 8 for tw in tweetObjs: latitude, longitude = tw.getRandPointInBoundingBox() folium.Marker([latitude, longitude], popup=tw.twScreenName).add_to(self.map) self.map.save(self.mapFile) self.webView.setHtml(open(self.mapFile,'r').read())
def transform_and_load(self, json_response, recreate_db): # inspect response line (optional) print("json printed: ", json.dumps(json_response, indent=4, sort_keys=True)) # dismantle the fields tweet_id = json_response["data"]["id"] tweet_text = json_response["data"]["text"] tweet_lang = json_response["data"]["lang"] tweet_created_at = json_response["data"]["created_at"] tweet_place_id = json_response["includes"]["places"][0]["id"] tweet_place_geo_bbox = json_response["includes"]["places"][0]["geo"][ "bbox"] tweet_place_full_name = json_response["includes"]["places"][0][ "full_name"] tweet_place_type = json_response["includes"]["places"][0]["place_type"] tweet_country_code = json_response["includes"]["places"][0][ "country_code"] stream_rule_tag = json_response["matching_rules"][0]["tag"] # construct tweet_data_dict tweet_data_dict = { 'twitter_id': tweet_id, 'text': tweet_text, 'lang': tweet_lang, 'created_at': tweet_created_at, 'places_geo_place_id': tweet_place_id, 'places_geo_bbox': tweet_place_geo_bbox, 'places_full_name': tweet_place_full_name, 'places_place_type': tweet_place_type, 'places_country_code': tweet_country_code, 'stream_rule_tag': stream_rule_tag } # construct a Tweet() object # data passed in to Tweet() has to be in a dictionary format single_tweet = Tweet(**tweet_data_dict) # inspect transformed Tweet() object print("single_tweet: ", single_tweet) # load data self.start_load(single_tweet, recreate_db)
def append_next_set(max_tweet_id): user_timeline = twitter.get_user_timeline(user_id=TWITTER_USER_ID, count=200, max_id=max_tweet_id, trim_user=True, exclude_replies=True, include_rts=False) for item in user_timeline: tweet_id = item["id"] tweet_status = item["text"] tweet_date = item["created_at"] rt_count = item["retweet_count"] fav_count = item["favorite_count"] next_tweet = Tweet.Tweet(tweet_id, tweet_status, tweet_date, rt_count, fav_count) if (len(ALL_TWEETS) == 0) or ( tweet_id != ALL_TWEETS[len(ALL_TWEETS) - 1].tweet_id): ALL_TWEETS.append(next_tweet)
def read_data_from_database(): mydb = mysql.connector.connect(user="******", password="******", host="localhost", database="tweetermysql_2018-07-30") my_cursor = mydb.cursor() my_cursor.execute( ' select ID, MetaData, Date, Section from news_2017_01 where day(Date) = "1" limit 2' ) my_result = my_cursor.fetchall() data = list() for item in my_result: u, m_data, date, section = item t = tweet.Tweet(u, section, m_data, datetime.strptime(date, '%Y-%m-%d')) data.append(t) return data
def create_tweet(tweets): # Ask for the user's name name = input("\nWhat is your name? ") while (True): # Ask for the user's message text = input("What would you like to tweet? ") # If the tweet is too long, display an error and ask again if (len(text) > 140): print("\nTweets can only be 140 characters!\n") else: # ...otherwise, the tweet is <140 characters, so stop looping break # Create a Tweet object using the user's name and message tweet = Tweet.Tweet(name, text) # Add the Tweet to the tweets list tweets.append(tweet) # Print a confirmation that the Tweet has been made print(name, ", your tweet has been saved.", sep="")
def load_annotations(annotation_file, source_file, annotation_type): dom = minidom.parse(annotation_file) all_annotations = Set() all_class_mentions = {} annotations = dom.getElementsByTagName("annotation") class_mentions = dom.getElementsByTagName("classMention") for cm in [cm for cm in class_mentions if cm.getElementsByTagName("mentionClass")[0].attributes["id"].value == annotation_type]: all_class_mentions[cm.attributes["id"].value] = cm.getElementsByTagName("mentionClass")[0].attributes["id"].value source_data = "" with open(source_file) as sf:: for line in sf: source_data += line for a in annotations: ann = Tweet.Annotation() if len(a.getElementsByTagName("annotator")) > 0 and annotator_okay(a.getElementsByTagName("annotator")[0], annotation_file): if len(a.getElementsByTagName("mention")) > 0: if len(a.getElementsByTagName("span")) > 0: if len(a.getElementsByTagName("spannedText")) > 0 and a.getElementsByTagName("spannedText")[0].firstChild != None: ann.ann_id = a.getElementsByTagName("mention")[0].attributes["id"].value ann.span_start = int(a.getElementsByTagName("span")[0].attributes["start"].value) ann.span_end = int(a.getElementsByTagName("span")[0].attributes["end"].value) ann.span_text = a.getElementsByTagName("spannedText")[0].firstChild.nodeValue.encode("utf-8") if ann.ann_id != "": if not source_data[ann.span_start:ann.span_end] == ann.span_text: if ann.span_text in source_data[ann.span_start-5:ann.span_end+5]: ann.span_start = ann.span_start-5+ (source_data[ann.span_start-5:ann.span_end+5].find(ann.span_text)) ann.span_end = ann.span_start + len(ann.span_text) if ann.ann_id in all_class_mentions: ann.entity_type += all_class_mentions[ann.ann_id] + " " all_annotations.add(ann) return all_annotations
def load_dataset(train_file, val_file, test_file): ''' Load the semeval twitter data Use Kate's Tweet class ''' class arbit: super args = arbit() args.subtask_id = 'a' args.train_file = train_file args.dev_file = val_file args.test_file = test_file train, val, test = Tweet.load_datasets(args) X_train, y_train, vmap = preprocess(train) X_val, y_val, _ = preprocess(val, vmap) X_test, y_test, _ = preprocess(test, vmap) X_train, X_val, X_test = map(pad_mask, [X_train, X_val, X_test]) y_train, y_val, y_test = map(np.asarray, [y_train, y_val, y_test]) return X_train, y_train, X_val, y_val, X_test, vmap
import telepot, timefrom nltk.chat.iesha import iesha_chatbotfrom tweep import Tweet tweet_client = Tweet()is_chatting = False def handle(msg): global is_chatting global tweet_client chat_id = msg['chat']['id'] command = msg['text'] print 'Got command: %s' % command if command == '/hello' and not is_chatting: bot.sendMessage(chat_id, 'Hello, how are you?') elif command == '/timeline' and not is_chatting: bot.sendMessage(chat_id, '\n'.join([message.text for message in tweet_client.handle.home_timeline()])) elif command.split('=')[0] == '/tweet' and not is_chatting: try: tweet_client.hitme(command.split('=')[1] + ' #mika') bot.sendMessage(chat_id, 'Your message tweeted successfully') except: bot.sendMessage(chat_id, 'There is some problem tweeting! Try after some time') elif command == '/chat': is_chatting = True bot.sendMessage(chat_id, 'Hi I am Iesha. Who are You?') elif command == '/stopchat': is_chatting = False bot.sendMessage(chat_id, 'Bye Bye. take care!') elif not command.startswith('/') and is_chatting: bot.sendMessage(chat_id, iesha_chatbot.respond(command)) else: pass # Create a bot object with API keybot = telepot.Bot('152871568:AAFRaZ6ibZQ52wXXXXXXXXXXXXXX') # Attach a function to notifyOnMessage call backbot.notifyOnMessage(handle) # Listen to the messageswhile 1: time.sleep(10)
for cell in IWList(interface).getData().values(): match = re.match(r"(\d+)/(\d+)", cell["Signal"]) strength_nu = match.group(1) strength_de = match.group(2) strength = float(strength_nu) / float(strength_de) essid = cell["ESSID"] mac = cell["MAC"] if essid == "": continue # Keep track of the strength of our primary network. if essid == "Westmont_Encrypted" and strength > strongest_signal: strongest_signal = strength Tweet.bark(essid, json_data) # Write to db values = [ str(time.time()), str(mac), str(essid), str(strength), str(gpsdata["lat"]), str(gpsdata["lon"]), str(gpsdata["alt"]) ] cur = con.cursor() cur.execute("INSERT INTO wifis values('" + "','".join(values) + "')") con.commit()
def getNextTweet(self): tweet = Tweet() line = self.f.readline() if line == "": # print "Missing line:" print (self.tweets,file=sys.stderr) self.f.close() return None try: o = json.loads(line) #print o['doc'] #o = json.loads(o['doc']) except json.decoder.JSONDecodeError as e: print ("Problematic JSON string:",file=sys.stderr) print (line,file=sys.stderr) print (e.args,file=sys.stderr) self.problems += 1 return None # Extract GPS: try 'geo' tag, fallback to 'location' tag try: if o['doc']['geo'] != None: (tweet.lat, tweet.lon) = o['doc']['geo']['coordinates'] self.GPStweets += 1 else: try: tweet.location = o['doc']['location'] match = self.rexLatLon.search(tweet.location) if bool(match): self.GPStweets += 1 (tweet.lat, tweet.lon) = float(match.group('lat')), float(match.group('lon')) except KeyError: print ("Location Key Error", file=sys.stderr) pass #raise self.tweets += 1 if self.tweets%100000 == 0: print ("Tweets so far: " + str(self.tweets), file=sys.stderr) #Tweet.WRONGuserID = o['from_user_id'] tweet.userName = o['doc']['from_user'] if o['doc']['to_user_id'] != None: tweet.toUser = o['doc']['to_user'] else: tweet.toUser = None tweet.text = o['doc']['text'].encode("utf-8") tweet.health = o['doc']['health'] tweet.createdAt = o['doc']['created_at'] tweet.profile_image = o['doc']['profile_image_url'] tweet.msgID = int(o['doc']['id']) #print("333333333") #tweet.sentiment = float(o['doc']['sentiment']) #commenting negemo posemo (PAVAN) #tweet.negemo = float(o['doc']['negemo']) # tweet.sad = float(o['doc']['sad']) #tweet.posemo = float(o['doc']['posemo']) #tweet.anger = float(o['doc']['anger']) #tweet.friends = float(o['doc']['friends']) #print("44444444444") tweet.datetime = datetime.strptime(tweet.createdAt, "%a, %d %b %Y %H:%M:%S +0000") - timedelta(hours=4) return (tweet, line) except KeyError: print ("KeyError:",file=sys.stderr) print (line, file=sys.stderr)# o print ("TWEETS",file=sys.stderr) print (self.tweets,file=sys.stderr) return ("Err")