def preprocessing(train, test, dev):
    print("Preprocessing ....")

    train = train.str.replace("#", "")
    test = test.str.replace("#", "")
    dev = dev.str.replace("#", "")

    train = train.map(lambda x: em.decode(x))
    test = test.map(lambda x: em.decode(x))
    dev = dev.map(lambda x: em.decode(x))

    train = train.str.lower()
    test = test.str.lower()
    dev = dev.str.lower()
    
    train = train.map(lambda x: " ".join(token.lemma_ for token in nlp(x) if token.lemma_ != "-PRON-"))
    test = test.map(lambda x: " ".join(token.lemma_ for token in nlp(x) if token.lemma_ != "-PRON-"))
    dev = dev.map(lambda x: " ".join(token.lemma_ for token in nlp(x) if token.lemma_ != "-PRON-"))
     
    train = train.map(lambda x: " ".join("someone" if "@" in word else word for word in x.split(" ")))
    test = test.map(lambda x: " ".join("someone" if "@" in word else word for word in x.split(" ")))
    dev = dev.map(lambda x: " ".join("someone" if "@" in word else word for word in x.split(" ")))
 
    #print(train)
    return (train, test, dev)
def clean_twitter_text(tweets_data):
    # Change all text to lowercase
    tweets_data['text'] = tweets_data['text'].str.lower()
    tweets_data['text'] = tweets_data['text'].str.replace(',', '')
    tweets_data['text'] = tweets_data['text'].str.replace('.', '')
    tweets_data['text'] = tweets_data['text'].str.strip()
    # Remove the '...'
    tweets_data['text'] = tweets_data['text'].str.replace(r'…', '', regex=True)
    # Remove hyperlinks
    tweets_data['text'] = tweets_data['text'].replace(r'http\S+',
                                                      '',
                                                      regex=True)
    # Replace \n with a space
    tweets_data['text'] = tweets_data['text'].replace(r'\n', ' ', regex=True)
    # Remove stock tags
    tweets_data['text'] = tweets_data['text'].replace(r'\$\S+', '', regex=True)
    # Remove tags
    tweets_data['text'] = tweets_data['text'].replace(r'\@\S+', '', regex=True)
    # Add eastern standard time column
    tweets_data['Datetime_eastern'] = tweets_data.index.tz_convert(
        'US/Eastern')
    # Create decoded version of text field
    tweets_data['text_dec'] = tweets_data['text'].map(
        lambda x: emojis.decode(x))
    # Add spaces around emojis so they can be separated as words
    tweets_data['text_dec'] = tweets_data['text_dec'].replace(r'(:[a-z]+:)',
                                                              ' \\1 ',
                                                              regex=True)

    return tweets_data
Esempio n. 3
0
def count_tweet_emoticons(tweets):
    results = {CATEGORY_HATE: {}, CATEGORY_NON_HATE: {}}
    hate_total = non_hate_total = 0
    for tweet in tweets:
        text = util.get_tweet_text(tweet)
        text_emojis = emojis.get(text)
        if len(text_emojis) == 0:
            # skip tweet if no emojis present
            continue
        category = ""
        if (tweet["hate_speech"]):
            category = CATEGORY_HATE
            hate_total += 1
        else:
            category = CATEGORY_NON_HATE
            non_hate_total += 1
        for emoji in text_emojis:
            emoji_key = emojis.decode(emoji)
            if emoji_key not in results[category]:
                results[category][emoji_key] = 1
            else:
                results[category][emoji_key] += 1

    # Order by
    results[CATEGORY_HATE] = sorted(results[CATEGORY_HATE].items(),
                                    key=lambda kv: (kv[1], kv[0]),
                                    reverse=True)
    results[CATEGORY_NON_HATE] = sorted(results[CATEGORY_NON_HATE].items(),
                                        key=lambda kv: (kv[1], kv[0]),
                                        reverse=True)

    results['hate_total'] = hate_total
    results['non_hate_total'] = non_hate_total

    return results
Esempio n. 4
0
 def get_media_type(self, message):
     main_view = self.main_view
     if message.media:
         media_type = message.media.to_dict()["_"]
         if media_type == "MessageMediaPhoto":
             media_type = "Photo"
         elif media_type == "MessageMediaDocument":
             atts = message.media.document.attributes
             filename = [
                 x for x in atts
                 if x.to_dict()["_"] == "DocumentAttributeFilename"
             ]
             sticker = [
                 x for x in atts
                 if x.to_dict()["_"] == "DocumentAttributeSticker"
             ]
             if sticker:
                 stickertext = sticker[0].alt
                 if main_view.text_emojis:
                     stickertext = emojis.decode(stickertext)
                 media_type = f"{stickertext}  Sticker"
             elif filename:
                 filename = filename[0].to_dict()["file_name"]
                 media_type = f"Document ({filename})"
             else:
                 media_type = f"Document ({message.media.document.mime_type})"
         downloaded = " (saved)" if (message.id in main_view.dialogs[
             main_view.selected_chat]["downloads"]) else ""
         return (media_type, downloaded)
     return (None, None)
Esempio n. 5
0
async def countCurrency(*dates):
    chan = await client.fetch_channel(os.getenv("CHANNEL1"))
    if dates:
        messages = await chan.history(limit=500,
                                      before=dates[0],
                                      after=dates[1]).flatten()
    elif not dates:
        messages = await chan.history(limit=500).flatten()
    authors = []
    pairs = {}
    for msg in messages:
        decoded = emojis.decode(msg.content)
        for currency in currencies:
            if (currency in decoded):
                currCount = decoded.count(currency)
                author = msg.author.name
                if (author not in authors):
                    authors.append(author)
                    pairs[author] = currCount
                else:
                    pairs[author] = int(pairs.get(author)) + currCount
    sortedPairs = {}
    sortedKeys = sorted(pairs, key=pairs.get, reverse=True)
    for key in sortedKeys:
        sortedPairs[key] = pairs[key]
    return sortedPairs
def df_emojis(df: pd.DataFrame, n=5):

    print("EMOJI ANALYSIS")

    author_counters = {}
    all_emojis = {}

    for author in df_get_author_list(df):
        author_counters[author] = {}

    for row in df.iterrows():
        emoji_list = row[1]["emojis"]
        author = row[1]["author"]

        if emoji_list:
            for emoji in emoji_list:
                if emoji in author_counters[author]:
                    author_counters[author][emoji] += 1
                else:
                    author_counters[author][emoji] = 1
                if emoji in all_emojis:
                    all_emojis[emoji] += 1
                else:
                    all_emojis[emoji] = 1

    all_emojis = {
        k: v
        for k, v in sorted(
            all_emojis.items(), reverse=True, key=lambda item: item[1])
    }
    print("OVERALL:")
    i = 1
    for emoji in all_emojis:
        if i > n:
            break
        print(emoji, "--", all_emojis[emoji])
        i += 1

    bar([emojis.decode(k) for k in list(all_emojis.keys())[:(n * 2)]],
        [all_emojis[k] for k in list(all_emojis.keys())[:(n * 2)]],
        "Emojis",
        "Number of times used",
        rotation='')

    for author in author_counters:
        author_counters[author] = {
            k: v
            for k, v in sorted(author_counters[author].items(),
                               reverse=True,
                               key=lambda item: item[1])
        }
        print(author)
        i = 1
        for emoji in author_counters[author]:
            if i > n:
                break
            print(emoji, "--", author_counters[author][emoji])
            i += 1
Esempio n. 7
0
    def draw_chats(self):
        selected_chat_index = self.main_view.selected_chat - self.main_view.selected_chat_offset
        offset = self.main_view.selected_chat_offset
        try:
            self.draw_frame(0, 0, self.chats_height, self.chats_width)
            index = 0
            y = 1
            for index in range(self.chats_num):
                dialog = self.main_view.dialogs[index + offset]
                message = dialog["messages"][
                    0] if "messages" in dialog else dialog["dialog"].message
                message_string = message.text if message.text else "[Non-text object]"
                if self.main_view.text_emojis:
                    message_string = emojis.decode(message_string)
                chat_name = get_display_name(dialog["dialog"].entity)
                from_string = get_display_name(message.sender)
                unread = dialog["unread_count"]
                unread_string = f"({unread} new)" if unread else ""
                date = dialog["dialog"].date
                date = date.astimezone()
                date_string = self._datestring(date)
                pinned = "* " if dialog["dialog"].pinned else "  "
                selected = selected_chat_index == index

                self.draw_text([
                    self.format("o" if dialog["online"] else " ",
                                attributes=self.main_view.colors["secondary"]),
                    self.format(
                        chat_name,
                        attributes=self.main_view.colors["primary"]
                        | curses.A_STANDOUT if selected else curses.A_BOLD,
                        width=int(0.5 * self.chats_width)),
                    self.format(f" {str(index)} " if self.show_indices else "",
                                attributes=self.main_view.colors["standout"]),
                    self.format(unread_string,
                                attributes=self.main_view.colors["error"],
                                alignment="right"),
                    self.format(date_string,
                                alignment="right",
                                attributes=self.main_view.colors["primary"]),
                ],
                               y,
                               2,
                               maxwidth=self.chats_width - 2)
                self.draw_text([
                    self.format(f"{from_string}:"),
                    self.format(
                        message_string,
                        width=self.chats_width - len(f"{from_string}: ") - 3)
                ],
                               y + 1,
                               2,
                               maxwidth=self.chats_width - 2)
                y += 3
                index += 1
        except Exception:
            show_stacktrace()
Esempio n. 8
0
def convert_emojis2names(top_emojis_list):
    names_list = []
    for emoji in top_emojis_list:
        demoji = emojis.decode(emoji)
        #print(demoji)
        name = re.findall(':(.*?):', demoji)
        if not name:
            name = ['black_small_square'] # 1 manual exception
        #print(name)
        names_list.append(name[0])
    return names_list
Esempio n. 9
0
 async def _handle_key(self, key):
     if self.main_view.command_box:
         try:
             n = int(self.main_view.command_box)
         except:
             return
         self.main_view.edit_message = self.main_view.dialogs[self.main_view.selected_chat]["messages"][n]
         self.main_view.mode = "edit"
         self.main_view.inputs = emojis.decode(self.main_view.edit_message.text)
         self.main_view.inputs_cursor = len(self.main_view.inputs)
         self.main_view.command_box = ""
Esempio n. 10
0
def checkemoji(incoming_msg):  #Função para verificar emoji
    if ':' in emojis.decode(incoming_msg):
        emj = emojis.decode(incoming_msg)
        emj = emj.replace(':', '')
        emjc = emojis.db.get_emoji_by_alias(emj)
        emjc = emjc.category
        emj = emj.replace('_', ' ')
        emjs = emj.split()
        lc = len(emjs[0]) - 1
        if emjc == 'Flags':
            return ("This is " + emj)
        elif emjs[0][lc] == 's':
            return ("These are the " + emj)
        elif 'men' in emj:
            return ("These are " + emj)
        elif emjs[0][0] in "aeio":
            return ("This is an " + emj)
        else:
            return ("This is a " + emj)
    else:
        return ("Ok")
def process_tweet(tweet):  

    # remove old style retweet text "RT"
    new_tweet = re.sub(r'^RT[\s]+', '', tweet)
    
    # decode emojis to text descriptions
    new_tweet = emojis.decode(new_tweet)

    # remove hyperlinks
    new_tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+)|(http?://[^\s]+))', '', new_tweet)
    new_tweet = re.sub(r'http\S+', '', new_tweet)

    # remove hashtags
    new_tweet = re.sub(r'#', '', new_tweet)
    
    # remove underscores
    new_tweet = re.sub(r'_', '', new_tweet)

    # remove all numbers
    new_tweet = re.sub(r'[0-9]', '', new_tweet)

    # remove usernames
    new_tweet = re.sub('@[^\s]+', '', new_tweet)
    
    # remove punctuation even in the middle of a string "in.the.middle"
    new_tweet = re.sub(r'[^\w\s]',' ', new_tweet)

    # instantiate tokenizer class
    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)

    # tokenize tweets
    tweet_tokens = tokenizer.tokenize(new_tweet)

    tweets_clean = []

    for word in tweet_tokens: # Go through every word in your tokens list
        if (word not in string.punctuation):  # remove punctuation
            tweets_clean.append(word)

    # Instantiate stemming class
    stemmer = PorterStemmer() 

    # Create an empty list to store the stems
    tweets_stem = [] 

    for word in tweets_clean:
        stem_word = stemmer.stem(word)  # stemming word
        tweets_stem.append(stem_word)  # append to the list
    
    return tweets_stem
Esempio n. 12
0
def translate_comment(col):
    print('开始%s翻译' % col)
    collection = mydb[col]
    collection.update_many({}, {'$set': {'Comment_English': 'none'}})
    querry = {"Comment_English": 'none'}
    comment_array = collection.find({}, {'comment_text': 1})
    print('已获取所有评论,准备开始翻译')
    for i in comment_array:
        ch_comment = emojis.decode(i['comment_text'])
        result = translate_client.translate(ch_comment, target_language='en')
        collection.update_one(
            querry, {'$set': {
                'Comment_English': result['translatedText']
            }})
Esempio n. 13
0
def extract_emoji(text: str) -> list:
    """Return all Unicode emojis contained in string"""

    # Change Unicode character to :emoji:
    text = emojis.decode(text)

    # Match all of them
    possible_emojis = re.findall(r"(:[^:]*:)", text)

    found_emoji = []

    # Might have matched even non-emoji (if text contained ':not and emoji:' for example)
    for emoji in possible_emojis:
        # Add only actual emojis
        if emojis.db.get_emoji_by_alias(emoji[1:-1]) is not None:
            found_emoji.append(emojis.encode(emoji))

    return found_emoji
Esempio n. 14
0
def emojiget(string):
    # initializing list and count for partials
    emojilist = list()
    partial_matches = 0

    # checking for custom discord emojis
    custom_matches = re.findall(r"<(a?):(\w*):(\d{18})>", string)
    if custom_matches:
        custom_matches = [i[1] for i in custom_matches]
        emojilist.extend(custom_matches)

    # checking for partial emojis
    for emoji in emojis.iter(string):
        partial_matches += 1
        emoji = emojis.decode(emoji).replace(":", "")
        emojilist.append(emoji)

    return emojilist
Esempio n. 15
0
    async def setemoji(self, ctx, emoji):
        """Lets a user define the emoji the user have to react with
        in order to let the bot quote it

        Usage:
        !setemoji {emoji}"""

        # if the emoji is a normal (no custom) emoji
        if not emoji.startswith("<"):
            emoji = emojis.decode(emoji)

        update_settings("Settings", dict(guild_id=ctx.guild.id, emoji=emoji))

        try:
            await ctx.send(f"New reaction emoji successfully set to {emoji}")
        except discord.Forbidden:
            return 0

        return 0
Esempio n. 16
0
    async def OnAddQuip(self, ctx, type: QuipType, *quip):
        """Adds a quip the bot can respond with when mentioned 

            **string|int:** <type>
           The type of quip you want to have.
           Available results (not case sensitive):
           - 0 (Regular)
           - 1 (Guild Emoji)
           - 2 (Specific User)
           - regular (Regular)
           - r (Regular)
           - emoji (Guild Emoji)
           - e (Guild Emoji)
           - user (Specific User)
           - u (Specific User)

           !! If Specific User Selected !!
           **discord.User:** <user>
           The discord user you want this quip to be specific to

           **string:** <quip>
           The quip you want to add.
        """
        if (type == QuipType.INVALID):
            raise InvalidQuipType(type)

        if (type == QuipType.SPECIFIC_USER and len(quip) == 0):
            raise commands.errors.MissingRequiredArgument(
                inspect.Parameter('user', inspect.Parameter.POSITIONAL_ONLY))

        if (type == QuipType.SPECIFIC_USER and len(quip) < 2):
            raise EmptyQuip()

        if (len(quip) == 0):
            raise EmptyQuip()

        if (botSettings.guild is None):
            raise InvalidGuild()

        combinedQuip = ' '.join(quip)
        combinedQuip = emojis.decode(combinedQuip)

        if (type == QuipType.GUILD_EMOJI
                and not discord.utils.get(botSettings.guild.emojis,
                                          name=combinedQuip)):
            raise InvalidGuildEmoji(combinedQuip)

        user = None
        additionalInfo = ' '
        if (type == QuipType.SPECIFIC_USER):
            converter = commands.UserConverter()
            user = await converter.convert(ctx, quip[0])
            additionalInfo = '[{}] '.format(user.mention)
            quip = quip[1:]
            combinedQuip = ' '.join(quip)

        botSettings.AddQuip(combinedQuip, type.value, user)

        message = '[{}]{}Quip added `{}`'.format(type.name, additionalInfo,
                                                 combinedQuip)

        await SendMessage(ctx, description=message, color=discord.Color.blue())
Esempio n. 17
0
 def demojify(self, text):
     text = text.apply(
         lambda x: emojis.decode(x).replace(':', ' ').replace('_', ' '))
Esempio n. 18
0
 def remove_emoji(self, src_str) -> str:
     decode_str = emojis.decode(src_str)
     return re.sub(":.*:", "", decode_str)
Esempio n. 19
0
for line in open("emoji_1.txt", encoding='utf-16'):
    clean_tweet = ' '.join(
        re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ",
               line).split())
    analysis = TextBlob(clean_tweet)
    polarity = round(analysis.polarity, 5)
    subjectivity = round(analysis.subjectivity, 5)
    for i in emojis.get(line):
        if i in save:
            num = line.count(i)
            position = round(
                (line.find(i) / len(line) + line.rfind(i) / len(line)) / 2, 5)

            if i not in cont.keys():
                cont[i] = [emojis.decode(i)]
                cont[i].append(num)
                cont[i].append([polarity])
                cont[i].append([subjectivity])
                cont[i].append([position])
            else:
                cont[i][1] = cont[i][1] + num
                cont[i][2].append(polarity)
                cont[i][3].append(subjectivity)
                cont[i][4].append(position)
print(1)
for line in open("emoji_2.txt", encoding='utf-16'):
    clean_tweet = ' '.join(
        re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ",
               line).split())
    analysis = TextBlob(clean_tweet)
Esempio n. 20
0
 def decode_emojis(text: str):
     '''
     https://emojis.readthedocs.io/en/latest/api.html#sample-code
     import emojis
     '''
     emoj = emojis.decode(text)
Esempio n. 21
0
    def draw_message(self, main_view, chat_idx):
        messages = main_view.dialogs[main_view.selected_chat]["messages"]
        message = messages[chat_idx]

        maxtextwidth = int(self.single_chat_fraction * self.W) - 2
        lines = []
        if message.text:
            message_lines = message.text.split("\n")
            for message_line in message_lines:
                if main_view.text_emojis:
                    message_line = emojis.decode(message_line)
                if message_line == "":
                    lines += [""]
                else:
                    lines += [
                        message_line[maxtextwidth * i:maxtextwidth * i +
                                     maxtextwidth]
                        for i in range(
                            int(math.ceil(len(message_line) / maxtextwidth)))
                    ]
        if message.media:
            media_type, downloaded = self.get_media_type(message)
            media_line = f"[{media_type}]{downloaded}"
            lines += [
                media_line[maxtextwidth * i:maxtextwidth * i + maxtextwidth]
                for i in range(int(math.ceil(len(media_line) / maxtextwidth)))
            ]

        reply = ""
        if message.is_reply:
            reply_id = message.reply_to_msg_id
            reply = " r?? "
            for idx2, message2 in enumerate(
                    main_view.dialogs[main_view.selected_chat]["messages"]):
                if message2.id == reply_id:
                    reply = f"r{idx2:02d}"
                    break

        from_message = message
        from_user = "******" if message.out else get_display_name(
            from_message.sender)
        via_user = f"   via   {get_display_name(from_message.forward.sender)}" if message.forward else ""
        user_string = f"{from_user}{via_user}   "
        out = []
        if message.out:
            out.append(
                f"{chat_idx}   {user_string}{self._datestring(message.date.astimezone())}"
                .rjust(maxtextwidth))
            for idx, text in enumerate(lines):
                out.append(text.rjust(maxtextwidth - 4))
            #out.append(f"{chat_idx}   {message.date.hour}:{message.date.minute:02d}".rjust(maxtextwidth) + ".")
            if message.is_reply:
                out.append(reply)
        else:
            out.append(
                f"{chat_idx}   {user_string}{self._datestring(message.date.astimezone())}"
            )
            for idx, text in enumerate(lines):
                out.append("    " + text)
            if message.is_reply:
                out.append(reply)
        return (out, message)
Esempio n. 22
0
    def draw_message(self, main_view, message, chat_idx):
        maxtextwidth = int(self.single_chat_fraction * self.W) - 2
        lines = []
        if message.text:
            message_lines = message.text.split("\n")
            for message_line in message_lines:
                if main_view.text_emojis:
                    message_line = emojis.decode(message_line)
                if message_line == "":
                    lines += [""]
                else:
                    lines += [
                        message_line[maxtextwidth * i:maxtextwidth * i +
                                     maxtextwidth]
                        for i in range(
                            int(math.ceil(len(message_line) / maxtextwidth)))
                    ]
        if message.media:
            media_type = message.media.to_dict()["_"]
            if media_type == "MessageMediaPhoto":
                media_type = "Photo"
            elif media_type == "MessageMediaDocument":
                atts = message.media.document.attributes
                filename = [
                    x for x in atts
                    if x.to_dict()["_"] == "DocumentAttributeFilename"
                ]
                if filename:
                    filename = filename[0].to_dict()["file_name"]
                    media_type = f"Document ({filename})"
                else:
                    media_type = f"Document ({message.media.document.mime_type})"
            lines += [f"[{media_type}]"]

        reply = ""
        if message.is_reply:
            reply_id = message.reply_to_msg_id
            reply = " r?? "
            for idx2, message2 in enumerate(
                    main_view.dialogs[main_view.selected_chat]["messages"]):
                if message2.id == reply_id:
                    reply = f"r{idx2:02d}"
                    break

        from_message = message
        from_user = "******" if message.out else get_display_name(
            from_message.sender)
        via_user = f"   via   {get_display_name(from_message.forward.sender)}" if message.forward else ""
        user_string = f"{from_user}{via_user}   "
        out = []
        if message.out:
            out.append(
                f"{chat_idx}   {user_string}{self._datestring(message.date.astimezone())}"
                .rjust(maxtextwidth))
            for idx, text in enumerate(lines):
                out.append(text.rjust(maxtextwidth - 4))
            #out.append(f"{chat_idx}   {message.date.hour}:{message.date.minute:02d}".rjust(maxtextwidth) + ".")
            if message.is_reply:
                out.append(reply)
        else:
            out.append(
                f"{chat_idx}   {user_string}{self._datestring(message.date.astimezone())}"
            )
            for idx, text in enumerate(lines):
                out.append("    " + text)
            if message.is_reply:
                out.append(reply)
        return (out, message)
Esempio n. 23
0
def unicodeEmojis(listaEmojis):
    lista = []
    for t in range(len(listaEmojis)):       
        lista.append(emojis.decode(listaEmojis[t]))
    return lista
Esempio n. 24
0
    def draw_chats(self):
        selected_chat_index = self.main_view.selected_chat - self.main_view.selected_chat_offset
        offset = self.main_view.selected_chat_offset
        try:
            self.draw_frame(0, 0, self.chats_height + 1, self.chats_width)
            index = 0
            y = 1
            chats_to_draw = self.chats_num
            while index < chats_to_draw:
                # only draw if messages are pinned and pins are viewable (at top)
                if index != 0 and index == self.main_view.num_pinned - offset:
                    self.draw_text([
                        self.format("─" * (self.chats_width // 2 - 1),
                                    alignment="center"),
                    ],
                                   y,
                                   1,
                                   maxwidth=self.chats_width - 2)
                    y += 2
                dialog = self.main_view.dialogs[index + offset]
                if dialog["dialog"].archived:
                    index += 1
                    chats_to_draw += 1
                    continue
                message = dialog["messages"][0] if len(
                    dialog["messages"]) > 0 else dialog["dialog"].message
                message_string = message.text if message.text else "[Non-text object]"
                if self.main_view.text_emojis:
                    message_string = emojis.decode(message_string)
                chat_name = get_display_name(dialog["dialog"].entity)
                if self.main_view.text_emojis:
                    chat_name = emojis.decode(chat_name)

                from_string = get_display_name(message.sender)
                unread = dialog["unread_count"]
                unread_string = f"({unread} new)" if unread else ""
                date = dialog["dialog"].date
                date = date.astimezone()
                date_string = self._datestring(date)
                pinned = "* " if dialog["dialog"].pinned else "  "
                selected = selected_chat_index == index
                self.draw_text([
                    self.format("o" if dialog["online"] else " ",
                                attributes=self.main_view.colors["secondary"]),
                    self.format(
                        chat_name,
                        attributes=self.main_view.colors["primary"]
                        | curses.A_STANDOUT if selected else curses.A_BOLD,
                        width=int(0.5 * self.chats_width)),
                    self.format(f" {str(index)} " if self.show_indices else "",
                                attributes=self.main_view.colors["standout"]),
                    self.format(unread_string,
                                attributes=self.main_view.colors["error"],
                                alignment="right"),
                    self.format(date_string,
                                alignment="right",
                                attributes=self.main_view.colors["primary"]),
                ],
                               y,
                               2,
                               maxwidth=self.chats_width - 2)
                debug(f"{self.chats_width=}")
                self.draw_text([
                    self.format(f"{from_string}:",
                                width=min(self.chats_width // 2,
                                          len(from_string) + 1)),
                    self.format(message_string,
                                width=self.chats_width -
                                min(self.chats_width // 2,
                                    len(f"{from_string}: ") + 1) - 3)
                ],
                               y + 1,
                               2,
                               maxwidth=self.chats_width - 2)
                y += 3
                index += 1
        except Exception:
            show_stacktrace()
Esempio n. 25
0
            location = tweet.place.name
        elif tweet.author.location != None:
            location = tweet.author.location
        else:
            location = tweet.user.location
        likes = tweet.favorite_count
        retweets = tweet.retweet_count
        

        clean_tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet.text).split())
        analysis = TextBlob(clean_tweet)
        polarity = analysis.polarity
        subjectivity = analysis.subjectivity

        emoji_all = emojis.get(tweet.text)
        emoji_see = emojis.decode(str(emoji_all))
        emoji_num = emojis.count(tweet.text)

        tweet = tweet.text.encode("unicode_escape")



        s = pd.Series({'id':my_id,'date':date,'author':author,'location':location,'likes':likes,'retweets':retweets,'polarity':polarity,'subjectivity':subjectivity,'emoji_all':emoji_all,'emoji_see':emoji_see,'emoji_num':emoji_num,'tweet':tweet})
        df = df.append(s, ignore_index=True)
        

    df.to_csv('foo1.csv')
    

    #print(df.head(5))
    
Esempio n. 26
0
def reviews():
    hotel_name = driver.find_element_by_xpath("//h1[@id='HEADING']").text
    try:
        go_review = driver.find_element_by_xpath(
            "//span[contains(@class, '_33O9dg0j')]")
        go_review.click()  #su per scendere giu alle recensioni
        city = str(args.place)
        driver.find_element_by_xpath(
            "//span[contains(text(),'Tutte le lingue')]").click()
        time.sleep(seconds)
        number_pages = driver.find_element_by_xpath(
            "//a[contains(@class, 'pageNum')][position() = last()]").text
        pages_review = int(number_pages)  #conversion

        if args.pr:
            pages_review = args.pr

        for j in range(0, pages_review):
            if j < (pages_review - 1):
                go_on = driver.find_element_by_xpath(
                    "//a[contains(text(),'Avanti')]")  #button
                info_plus = driver.find_element_by_xpath(
                    "//div[contains(@class,'XUVJZtom')]//span[contains(text(),'Scopri di pi')]"
                )
                info_plus.click()
                time.sleep(seconds)
                all_reviews = driver.find_elements_by_xpath(
                    "//q[contains(@class, 'IRsGHoPm')]")
                for i in range(0, (len(all_reviews))):  #loop reviews
                    insert_table = "REPLACE INTO reviews (Name, City, Rating, Review, Hometown, Date_of_stay, Trip_type) VALUES (%s, %s, %s, %s, %s, %s, %s)"
                    review = emojis.decode(all_reviews[i].text)
                    ix = str(i + 1)  #index
                    time.sleep(seconds)
                    try:
                        rating_value = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix + "]//span[contains(@class, 'ui_bubble')]")
                        rating_class = rating_value.get_attribute("class")
                        length_class = len(rating_class)
                        value_rating_len = rating_class[length_class - 2]
                        rating = int(value_rating_len)  #rating (1 a 5)
                    except:
                        rating = ''

                    try:
                        hometown_element = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix +
                            "]//span[contains(@class,'default _3J15flPT small')]"
                        )  #hometown
                        hometown = hometown_element.text
                    except:
                        hometown = ''

                    try:
                        date_element = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix +
                            "]//span[contains(@class, '_34Xs-BQm')]")  #date
                        date_bef = date_element.text
                        date = date_bef.replace('Data del soggiorno:', '')
                    except:
                        date = ''

                    try:
                        triptype_element = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix +
                            "]//span[contains(@class, '_2bVY3aT5')]")  #type
                        triptype_bef = triptype_element.text
                        triptype = triptype_bef.replace('Tipo di viaggio:', '')
                    except:
                        triptype = ''

                    records_to_insert = [(hotel_name, city, rating, review,
                                          hometown, date, triptype)]
                    cursor.executemany(insert_table, records_to_insert)
                    connection.commit()
                print(cursor.rowcount, "record in Reviews")
                go_on.click()
                time.sleep(seconds)
            else:  #last page
                info_plus = driver.find_element_by_xpath(
                    "//div[contains(@class,'XUVJZtom')]//span[contains(text(),'Scopri di pi')]"
                )
                info_plus.click()
                time.sleep(seconds)
                all_reviews = driver.find_elements_by_xpath(
                    "//q[contains(@class, 'IRsGHoPm')]")
                for i in range(0, (len(all_reviews))):
                    insert_table = "REPLACE INTO reviews (Name, City, Rating, Review, Hometown, Date_of_stay, Trip_type) VALUES (%s, %s, %s, %s, %s, %s, %s)"
                    review = emojis.decode(all_reviews[i].text)
                    ix = str(i + 1)  #index
                    time.sleep(seconds)

                    try:
                        rating_value = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix + "]//span[contains(@class, 'ui_bubble')]")
                        rating_class = rating_value.get_attribute("class")
                        length_class = len(rating_class)
                        value_rating_len = rating_class[length_class - 2]
                        rating = int(value_rating_len)  #rating (1 a 5)
                    except:
                        rating = ''

                    try:
                        hometown_element = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix +
                            "]//span[contains(@class,'default _3J15flPT small')]"
                        )  #hometown
                        hometown = hometown_element.text
                    except:
                        hometown = ''

                    try:
                        date_element = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix +
                            "]//span[contains(@class, '_34Xs-BQm')]")  #date
                        date_bef = date_element.text
                        date = date_bef.replace('Data del soggiorno:', '')
                    except:
                        date = ''

                    try:
                        triptype_element = driver.find_element_by_xpath(
                            "//div[contains(@class,'_2wrUUKlw _3hFEdNs8')][" +
                            ix +
                            "]//span[contains(@class, '_2bVY3aT5')]")  #type
                        triptype_bef = triptype_element.text
                        triptype = triptype_bef.replace('Tipo di viaggio:', '')
                    except:
                        triptype = ''

                    records_to_insert = [(hotel_name, city, rating, review,
                                          hometown, date, triptype)]
                    cursor.executemany(insert_table, records_to_insert)
                    connection.commit()
                print(cursor.rowcount, "record in Reviews")
    except:
        pass
Esempio n. 27
0
    async def handle_key(self, key, redraw = True):
        if self.mode == "popupmessage":
            self.mode = self.modestack.pop()
        if not self.ready:
            return
        if key == "RESIZE":
            await self.drawtool.resize()
            return
        if self.macro_recording:
            if key != "q":
                self.macro_sequence.append(key)
        if self.mode == "search":
            if key == "ESCAPE" or key == "RETURN":
                self.mode = "normal"
            elif key == "BACKSPACE":
                if self.search_box == "":
                    self.mode = "normal"
                else:
                    self.search_box = self.search_box[0:-1]
                    self.search_chats()
                    self.search_next()
            else:
                self.search_box += key
                self.search_chats()
                self.search_next()
        elif self.mode == "vimmode":
            if key == "ESCAPE":
                self.mode = "normal"
            elif key == "RETURN":
                await self.call_command()
                self.vimline_box = ""
                self.mode = "normal"
            elif key == "BACKSPACE":
                if self.vimline_box == "":
                    self.mode = "normal"
                else:
                    self.vimline_box = self.vimline_box[0:-1]
            else:
                self.vimline_box += key
        elif self.mode == "normal":
            num = None
            try:
                num = int(key)
            except:
                pass
            if num is not None:
                self.command_box += str(num)
                await self.drawtool.redraw()
                return
            elif key == ":":
                self.mode = "vimmode"
                self.vimline_box = ""
            elif key == "RETURN" or key == "y":
                await self.send_message()
            elif key == "Q":
                await self.quit()
            elif key == "q":
                if self.macro_recording == None:
                    # start macro recording
                    async def record_macro(self, key):
                        if "a" < key.lower() < "z":
                            self.macro_recording = key
                            self.popup_message(f"recording into {key}")
                        else:
                            self.popup_message(f"Register must be [a-zA-Z]")

                    self.spawn_popup(record_macro, "Record into which register?")
                else:
                    # end macro recording
                    self.macros[self.macro_recording] = self.macro_sequence
                    self.macro_recording = None
                    self.macro_sequence = []
            elif key == "@":
                # execute macro
                async def ask_macro(self, key):
                    if key in self.macros.keys():
                        macro = self.macros[key]
                        debug(macro)
                        for k in macro:
                            await self.handle_key(k, redraw = False)
                    else:
                        self.popup_message(f"No such macro @{key}")

                self.spawn_popup(ask_macro, "Execute which macro?")
            elif key == "C":
                self.select_prev_chat()
            elif key == "c":
                self.select_next_chat()
            elif key == "E":
                self.text_emojis ^= True
            elif key == "R":
                await self.mark_read()
            elif key == "d":
                if self.command_box:
                    try:
                        n = int(self.command_box)
                    except:
                        return
                    if n >= len(self.dialogs[self.selected_chat]["messages"]):
                        #TODO: alert user
                        self.popup_message("No message by that id.")
                        await self.drawtool.redraw()
                        return
                    async def action_handler(self, key):
                        if key in ["y","Y"]:
                            to_delete = self.dialogs[self.selected_chat]["messages"][n]
                            await to_delete.delete()
                            self.dialogs[self.selected_chat]["messages"].pop(n)
                            self.command_box = ""
                        self.mode = "normal"
                    question = f"Are you really sure you want to delete message {n}? [y/N]"
                    self.spawn_popup(action_handler, question)

                    await self.drawtool.redraw()
            elif key == "e":
                if self.command_box:
                    try:
                        n = int(self.command_box)
                    except:
                        return
                    self.edit_message = self.dialogs[self.selected_chat]["messages"][n]
                    self.mode = "edit"
                    self.inputs = emojis.decode(self.edit_message.text)
                    self.command_box = ""
            elif key == "r":
                if self.command_box:
                    try:
                        n = int(self.command_box)
                    except:
                        return
                    reply_to = self.dialogs[self.selected_chat]["messages"][n]
                    s = emojis.encode(self.inputs)
                    reply = await reply_to.reply(s)
                    await self.on_message(reply)
                    self.command_box = ""
                    self.inputs = ""
            elif key == "m":
                if self.command_box:
                    try:
                        n = int(self.command_box)
                    except:
                        return
                    self.command_box = ""
                    await self.show_media(n)
            elif key == "M":
                self.center_selected_chat()
            elif key == "HOME" or key == "g":
                self.select_chat(0)
            elif key == "END" or key == "G":
                self.select_chat(-1)
            elif key == "i":
                self.mode = "insert"
            elif key == "n":
                self.search_next()
            elif key == "N":
                self.search_prev()
            elif key == "/":
                self.mode = "search"
                self.search_box = ""
            elif key == " ":
                self.drawtool.show_indices ^= True
        elif self.mode == "popup":
            action, _ = self.popup
            # I think this could break
            self.mode = self.modestack.pop()
            await action(self, key)
        elif self.mode == "edit":
            if key == "ESCAPE":
                async def ah(self, key):
                    if key in ["Y", "y", "RETURN"]:
                        edit = await self.edit_message.edit(self.inputs)
                        await self.on_message(edit)
                        # TODO: update message in chat
                        # this on_message call does not work reliably
                        self.mode = "normal"
                    else:
                        self.popup_message("Edit discarded.")
                        self.mode = "normal"
                self.spawn_popup(ah, "Do you want to save the edit? [Y/n]")
            elif key == "LEFT":
                self.insert_move_left()
            elif key == "RIGHT":
                self.insert_move_right()
            elif key == "BACKSPACE":
                self.inputs = self.inputs[0:-1]
            elif key == "RETURN":
                self.inputs += "\n"
            else:
                self.inputs += key
        elif self.mode == "insert":
            if key == "ESCAPE":
                self.mode = "normal"
            elif key == "LEFT":
                self.insert_move_left()
            elif key == "RIGHT":
                self.insert_move_right()
            elif key == "BACKSPACE":
                self.inputs = self.inputs[0:-1]
            elif key == "RETURN":
                self.inputs += "\n"
            else:
                self.inputs += key
        self.command_box = ""
        if redraw:
            await self.drawtool.redraw()
Esempio n. 28
0
                else:
                    location = tweet.user.location
                likes = tweet.favorite_count
                retweets = tweet.retweet_count

                #use TextBlob to do sentiment analysis
                clean_tweet = ' '.join(
                    re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)",
                           " ", tweet.text).split())
                analysis = TextBlob(clean_tweet)
                polarity = analysis.polarity
                subjectivity = analysis.subjectivity

                #use emojis to do emoji process
                emoji_all = emojis.get(tweet.text)
                emoji_see = emojis.decode(" ".join(emoji_all))
                emoji_num = emojis.count(tweet.text)

                #previous tweet text data, use unicode to encode, also can .decode("unicode_escape")
                tweet = tweet.text.encode("unicode_escape")

                s = pd.Series({
                    'id': my_id,
                    'date': date,
                    'author': author,
                    'location': location,
                    'likes': likes,
                    'retweets': retweets,
                    'polarity': polarity,
                    'subjectivity': subjectivity,
                    'emoji_all': emoji_all,
video_frame.to_excel('Video_frame.xlsx', index=False, header=True)

video_frame.head(100)

comment_frame = video_frame.loc[:,['videoTitle','textDisplay','likeCount','replyCount']]

for i in comment_frame.index:
    print(comment_frame.loc[i,'textDisplay']+"\n")

"""## Data PreProcessing"""

! pip install emojis

import emojis
comment_frame.textDisplay = comment_frame.textDisplay.apply(lambda x: emojis.decode(x).replace(':', ' ').replace('_', ' '))

import nltk
nltk.download('stopwords')

# imports
from bs4 import BeautifulSoup
import unicodedata
# from contractions import CONTRACTION_MAP # from contractions.py
import re 
import string
import nltk
import spacy
nlp = spacy.load('en',parse=True,tag=True, entity=True)
from nltk.tokenize import ToktokTokenizer
tokenizer = ToktokTokenizer()