def check_emojis(e1: EMOJI_CHECK, e2: EMOJI_CHECK) -> tuple[bool, Optional[str]]: """Validate the emojis, the user put.""" if isinstance(e1, str) and emojis.count(e1) != 1: return False, e1 if isinstance(e2, str) and emojis.count(e2) != 1: return False, e2 return True, None
def check_emojis(e1: EMOJI_CHECK, e2: EMOJI_CHECK) -> Tuple[bool, Optional[str]]: """ Validates the emojis the user put. :param e1: The first emoji. :param e2: The second emoji. """ if isinstance(e1, str) and emojis.count(e1) != 1: return False, e1 if isinstance(e2, str) and emojis.count(e2) != 1: return False, e2 return True, None
def get_all_specific_features(Authors): features = [] for author in Authors: features_user = [] emoji = 0 first_person = 0 pronouns = 0 negations = 0 for tweet in author['tweets']: current_preprocessed_tweet = clean_en.tokenize(tweet) emoji = emoji + emojis.count(tweet) '''emoji = emoji + len(re.findall(u'[\U0001f600-\U0001f650]', tweet))''' first_person = first_person + \ current_preprocessed_tweet.count('<first_person>') pronouns = pronouns + current_preprocessed_tweet.count('<pronoun>') negations = negations + \ current_preprocessed_tweet.count('<negation>') features_user.append(emoji) features_user.append(first_person) features_user.append(pronouns) features_user.append(negations) features.append(features_user) return features
def emojiCount(file): file = open(file, 'r', encoding='utf-8') i = 0 for line in file: if emojis.count(line) > 0: i += 1 print(i)
def emoticon_number_sd(author): emoticon_number_list = [] for tweet in author['tweets']: # emoticon_number = len(re.findall(u'[\U0001f600-\U0001f650]', tweet)) emoticon_number_list.append(emojis.count(tweet)) sd = stats.stdev(emoticon_number_list) return sd
def final_process(word): if emojis.count(word) > 0: return [word] elif len(re.sub(r'\W+', '', word)) > 1: return re.sub(r'\W+', ' ', word).split() else: return ''
def get_all_specific_features_label(Authors): print("specific es") features = [] i = 0 for author in Authors: print(i) i = i + 1 features_user = [] emoji = 0 first_person = 0 pronouns = 0 negations = 0 for tweet in author['tweets']: current_preprocessed_tweet = clean_es.tokenize(tweet) emoji = emoji + emojis.count(tweet) first_person = first_person + \ current_preprocessed_tweet.count('<first_person>') pronouns = pronouns + current_preprocessed_tweet.count('<pronoun>') negations = negations + \ current_preprocessed_tweet.count('<negation>') features_user.append(emoji) # features_user.append(first_person) # features_user.append(pronouns) features_user.append(negations) features.append(features_user) return features
def count_unique_emojis(text: str, unique=True): ''' https://emojis.readthedocs.io/en/latest/api.html#sample-code import emojis ''' emoj = emojis.count(text) return emoj
def emoji_1(Authors): for author in Authors: emoji = 0 for tweet in author['tweets']: emoji = emoji + emojis.count(tweet) '''emoji = emoji + len(re.findall(u'[\U0001f600-\U0001f650]', tweet))''' author['emoji'] = emoji
def emoticon_ratio(author): tweet_having_emoticon = 0 emoticon_number = 0 for tweet in author['tweets']: # emoticon_number = len(re.findall(u'[\U0001f600-\U0001f650]', tweet)) emoticon_number = emojis.count(tweet) if emoticon_number > 0: tweet_having_emoticon += 1 return tweet_having_emoticon/len(author['tweets'])
def emoji_2(Authors): features = [] for author in Authors: emoji = 0 for tweet in author['tweets']: emoji = emoji + emojis.count(tweet) '''emoji = emoji + len(re.findall(u'[\U0001f600-\U0001f650]', tweet))''' features.append(emoji) return features
def text(update, context): text_received = update.message.text # Hack to check if all graphemes are emojis if emojis.count(text_received) == len( list(grapheme.graphemes(text_received))): print("emojis:", text_received) update.message.reply_text(f'uwu {text_received}') SSEFuckery.sse_broadcast("emojis", text_received) return print("scrolly-text:", text_received) update.message.reply_text(f'auzi cica >{text_received}') SSEFuckery.sse_broadcast("scrolly-text", text_received)
async def radio_mode_message(message): """Listens to messages and deletes them if they're not text-only in a radio channel""" if message.author == OMEGA.user: return cursor.execute("SELECT EXISTS(SELECT 1 FROM radio WHERE channel_id=?);", [message.channel.id]) if cursor.fetchall( )[0][0] and (message.attachments or emojis.count(message.content) or any( re.search(expression, message.content) for expression in [ r"<:\w*:\d*>", r"(([\w]+:)?//)?(([\d\w]|%[a-fA-f\d]{2})+(:([\d\w]|%[a-fA-f\d]{2})+)?@)" r"?([\d\w][-\d\w]{0,253}[\d\w]\.)+[\w]{2,63}(:[\d]+)?" r"(/([-+_~.\d\w]|%[a-fA-f\d]{2})*)" r"*(\?(&?([-+_~.\d\w]|%[a-fA-f\d]{2})=?)*)?(#([-+_~.\d\w]|%[a-fA-f\d]{2})*)?", ])): await message.delete()
def process_tweets(tweet): ''' Takes in a single tweet (string of text, e.g. the 'tweet_text' key) and returns a processed, tokenized version of the tweet (list) ''' # both emoji encoders exclude different emojis, so figured why not use both for increased coverage tweet = emoji.emojize(emojis.encode(tweet)) # remove @ tags (usernames) tweet = re.sub('@[A-Za-z0-9_]+', '', tweet) # remove commas from numbers (e.g. 1,000 -> 1000) tweet = re.sub('([0-9],[0-9])', lambda x: str(x.group(0)).replace(',', ''), tweet) # remove strange unicode encodings and use NLTK TweetTokenizer to tokenize tweet = [ word.encode('ascii', 'ignore').decode('ascii') if emojis.count(word) == 0 else word for word in tt.tokenize(tweet) ] # remove nonalphanumeric words except for emojis tweet = [ final_process(word) for word in tweet if final_process(word) != '' ] return [item for sublist in tweet for item in sublist]
def create_tweet_features(tweets_data, run_countvec=False, run_tfidf=False): # clean_twitter_text should be run before this function for the best results # Create feature for number of exclamation marks tweets_data['exc_count'] = tweets_data['text'].map(lambda x: x.count("!")) # Create a column for number of characters tweets_data['characters_nb'] = tweets_data.text.apply(len) # Add count for emojis tweets_data['emoji_count'] = tweets_data['text'].map( lambda x: emojis.count(x)) # Add text features vocab_list = vocab.get_all_vocab() if run_countvec: tweets_data_vocab = apply_countvectorizer(tweets_data, 'text_dec', vocab_def=vocab_list) elif run_tfidf: tweets_data_vocab = apply_tfidf(tweets_data, 'text_dec', vocab_def=vocab_list) return tweets_data_vocab
def count_emojis(text: str): ''' import emojis ''' emoj = emojis.count(text) return emoj
id_list = [] for i in range(1): if i == 0: emoji_list = ":smile: :joy: :heart: :disappointed: :sob: :sparkles: :relieved: :wink: :cry: :angry:" keywords = emojis.encode(emoji_list).split() #keywords = ["the","be","to","of","and","a","in","that","have","I"] else: keywords = [ "good", "happy", "free", "great", "easy", "bad", "alone", "upset", "sad", "lost" ] #for tweet in tweets: for tweet in Cursor(api.search, q=["trump"], lang="en").items(10): if (emojis.count(tweet.text) != 0) and (tweet.id not in id_list): #basic information of tweets my_id = tweet.id id_list.append(tweet.id) date = tweet.created_at author = tweet.author.screen_name #many tweets don't have location information if tweet.place != None: location = tweet.place.name elif tweet.author.location != None: location = tweet.author.location else: location = tweet.user.location likes = tweet.favorite_count retweets = tweet.retweet_count
def has_permissions(user: discord.Member) -> bool: return any(role in user.roles for role in [Constants.PEPEHACK_ROLE, Constants.BARTENDER_ROLE]) # Выбирает из 2 вариантов male/female в зависимости от наличия соответственной роли в дискорде def gender(user, male, female): return female if Constants.FEMALE_ROLE in user.roles else male # Выдаёт форматированный смайл с названием emote_name в формате строки для сообщения в дискорде # Если смайла с данным именем нет на сервере, возвращаяется пустая строка def emote(emote_name: str) -> str: if (emote := discord.utils.get(Constants.GUILD.emojis, name=emote_name)): return str(emote) elif emojis.count(emojis.encode(f':{emote_name}:')) > 0: return emojis.encode(f':{emote_name}:') else: return '' # Возвращает id юзера типа int при заданном пинге юзера (отметка через @) # В случае ошибки поднимает исключение ValueError def get_id(user_mention: str) -> int: user_mention = user_mention.replace("<", "") user_mention = user_mention.replace(">", "") user_mention = user_mention.replace("@", "") user_mention = user_mention.replace("!", "") user_mention = user_mention.replace("&", "") return int(user_mention)
def is_emoji(s): try: return emojis.count(s) except: return 0
else: location = tweet.user.location likes = tweet.favorite_count retweets = tweet.retweet_count #use TextBlob to do sentiment analysis clean_tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet.text).split()) analysis = TextBlob(clean_tweet) polarity = analysis.polarity subjectivity = analysis.subjectivity #use emojis to do emoji process emoji_all = emojis.get(tweet.text) emoji_see = emojis.decode(" ".join(emoji_all)) emoji_num = emojis.count(tweet.text) #previous tweet text data, use unicode to encode, also can .decode("unicode_escape") tweet = tweet.text.encode("unicode_escape") s = pd.Series({'id':my_id,'date':date,'author':author,'location':location,'likes':likes,'retweets':retweets,'polarity':polarity,'subjectivity':subjectivity,'emoji_all':emoji_all,'emoji_see':emoji_see,'emoji_num':emoji_num,'tweet':tweet}) df = df.append(s, ignore_index=True) print(df.shape[0]) print(i) time.sleep(3) df.to_excel('TweetData_emoji.xlsx', sheet_name='Sheet1') # Excel is better, beecause we can see the emoji
# 测试Emoji表情符号包Emojis的功能 import emojis print(emojis.count('Python is fun 👍')) # 读取搜狗输入法的表情示例,验证搜狗输入法的表情代码,的确为Unicode,如U+0001F601 from collections import defaultdict file = open('data\sogouw.txt', 'w', encoding='utf-8-sig') with open('data\sogou.txt', 'r', encoding='utf-8-sig') as f: i = 0 for line in f: line = line.replace(' ', '\n') while i < len(line): file.write(line[i]) i += 1 file.close() frequencies = defaultdict(int) with open('data\sogouw.txt', 'r', encoding='utf-8-sig') as f: for line in f: line = line.replace('\n', '') a = line.encode('unicode-escape').decode('utf-8') print(line, a) a = a.replace('\\u', '').replace('\\U', '').replace('feff', '') # 计算Sogou Unicode的十进制值以划定范围 try: b = int(a, 16) except: print(a)
def EmojiCount(tweet): tweet = emojis.encode(tweet) return emojis.count(tweet, unique=True)
def emoticon_number_avg(author): emoticon_number = 0 for tweet in author['tweets']: # emoticon_number = len(re.findall(u'[\U0001f600-\U0001f650]', tweet)) emoticon_number += emojis.count(tweet) return emoticon_number/len(author['tweets'])