Example #1
0
def text_contains_emoji(text):
    for char in text:
        if emoji.demojize(char) != char:
            return True

    # Edge case: check for flags as they're represented as multiple chars:
    # https://en.wikipedia.org/wiki/Regional_Indicator_Symbol
    if len(text) == 2:
        spaced_string = "{} {}".format(text[0], text[1])
        if emoji.demojize(spaced_string) != spaced_string:
            return True

    return False
Example #2
0
    def on_status(self, tweet):
        try:
            twitter_user_id = str(tweet.user.id)
            splitted = tweet.text.split(' ')
            if len(splitted) <= 3:

                emoji_status = splitted[1]
                is_valid_emoji = emoji.demojize(emoji_status) != emoji_status

                if not is_valid_emoji:
                    app_api.update_status(status=u'@' + tweet.user.screen_name + u' try again with a single emoji: ".YoApp 😂"')
                    return

                yo_access_token = redis_store.get('yo.token.for.twitter.user.id:' + twitter_user_id)
                if not yo_access_token:
                    app_api.update_status(status=u'@' + tweet.user.screen_name + u' let\'s link your twitter to your Yo Status here: https://yostat.us/twitter/authorize')
                    return

                response = requests.post('https://api.justyo.co/status/', json={
                    'status': emoji_status,
                    'access_token': yo_access_token
                })

                if response.status_code == 200:
                    app_api.update_status(status=u'@' + tweet.user.screen_name + u' your status is now: ' + emoji)
                else:
                    app_api.update_status(status=u'@' + tweet.user.screen_name + u' let\'s link your twitter to your Yo Status here: https://yostat.us/twitter/authorize')
        except Exception as e:
            print e.message
Example #3
0
    def _handle_message(self, msg):
        """parse a single message row"""

        msg['number'] = '00' + msg['number'].split('@')[0]
        msg['name'] = self._numberdict.get(msg['number'],msg['number'])
        msg['verb'] = 'to' if msg['type'] else 'from'
        msg['type'] = 'OUTGOING' if msg['type'] else 'INCOMING'
        msg['handler'] = self._args.handler

        if msg['text']:
            if self._args.demojize:
                msg['text'] = emoji.demojize(msg['text'])

            if self._args.skip_emoji:
                msg['text'] = re.sub(emoji.get_emoji_regexp(), '', msg['text'])

        timestamp = datetime.datetime.fromtimestamp(msg['timestamp'] / 1000)

        properties = OrgProperties(data_for_hashing=json.dumps(msg))
        properties.add('NUMBER', msg['number'])
        properties.add('TYPE', msg['type'])

        output = self._args.output_format.format(**msg)

        if msg['text'] and not self._is_ignored(msg):
            self._writer.write_org_subitem(timestamp=OrgFormat.datetime(timestamp),
                                           output=output, properties=properties)
def get_emoji_counts(master, emoji_counts, candidate):
    if candidate not in emoji_counts.keys():
        emoji_counts[candidate] = {}
    for key in master.keys():
        tweet = master[key][0]
        date = master[key][1]
        date = datetime.datetime.strptime(date,'%a %b %d %H:%M:%S %Z %Y')
        date_ft = date.strftime('%m-%d-%Y')
        
        # Replace all URLs in Tweet (to avoid confusion with emoticon)
        tweet = re.sub('htt[^ ]*' ,'URL', tweet)
        
        tokens = twtokenizer.tokenize(tweet)
        tokens = [emoji.demojize(token) for token in tokens]
        # tokens = [word for word in tokens if word not in string.punctuation]

        for token in tokens:
            if re.match(':+[a-z_]*:*',token):
                if date_ft not in emoji_counts[candidate].keys():
                    emoji_counts[candidate][date_ft] = {}
                if token in emoji_counts[candidate][date_ft]:
                    emoji_counts[candidate][date_ft][token] +=1
                else:
                    emoji_counts[candidate][date_ft][token] = 1
    return emoji_counts
Example #5
0
def clean(instring, spaces = True): #removes punctuation and double spaces, replacing them w/ single spaces
    instring.replace("\n"," ")
    for x in punctuation:
            instring = instring.replace(x, " ")
    instring = emoji.demojize(instring) #demojize turns emojis into text with this format: :emoji_text_alias:
    if instring.find(":") > -1: #then the tweet has emojis!
        inlist = instring.split()
        moreEmoji = True
        while moreEmoji:
            try:
                beginning = inlist.index(":")
                end = inlist.index(":",beginning)
                inlist.insert(end+1," ")
            except ValueError:
                moreEmoji = False
        instring = ""
        for x in inlist:
            instring += x
    if spaces:
        while instring.find("  ") > -1: #remove double spaces
            instring = instring.replace("  ", " ")
    else:
        while instring.find(" ") > -1:  #remove all spaces
            instring = instring.replace(" ","")
    instring = instring.lower()
    return instring
Example #6
0
def comment_image(browser, comments):
  """Checks if it should comment on the image"""
  rand_comment = (choice(comments))
  rand_comment = emoji.demojize(rand_comment)
  rand_comment = emoji.emojize(rand_comment, use_aliases=True)



  comment_input = browser.find_elements_by_xpath('//textarea[@placeholder = "Add a comment…"]')
  if len(comment_input) <= 0:
    comment_input = browser.find_elements_by_xpath('//input[@placeholder = "Add a comment…"]')

  if len(comment_input) > 0:
    browser.execute_script("arguments[0].value = '" + rand_comment + " ';", comment_input[0]);
    #An extra space is added here and then deleted. This forces the input box to update the reactJS core
    comment_input[0].send_keys("\b")
    comment_input[0].submit()
  else:
    print(u'--> Warning: Comment Action Likely Failed: Comment Element not found')
    # print(u'--> Commented: {}'.format(rand_comment))
  #print("--> Commented: " + rand_comment.encode('utf-8'))
  print("--> Commented: {}".format(rand_comment.encode('utf-8')))
  sleep(2)


  return 1
    def decrypt(self, encrypted_message):
        # Simple Ceasar Cypher, the emoji-key index position marks 'a', the rest of the alphabet is defined from starting index 'a'
        # cipher dict is regenerated as in Encrpyt, but then key value pairs are reversed
        # returns decrypted message in text

        if self.cipher == None:
            self.cipher = self.define_cipher()

        #reverse the cipher
        rev_cipher= {v: k for k, v in self.cipher.items()}
  
        decrypted = []
        encrypted_message = (emoji.demojize(encrypted_message))

        # this handles the combination character emojis- like sign_of_the_horns_light_skin_tone
        # a space is designated with a ~
        # then lines are split based on :
        line = re.sub(' ', '~', encrypted_message)
        line = re.sub(':', ' ', line)
        line_list = (line.split())
        for symbol in line_list:
            mod_symbol = ':'+symbol+':'
            if mod_symbol in rev_cipher:
                decrypted.append(rev_cipher[mod_symbol])
            else:
                mod_symbol = re.sub('~', ' ', mod_symbol)
                mod_symbol = re.sub(':', '', mod_symbol)
          
                decrypted.append(mod_symbol)

        return ''.join(decrypted)
Example #8
0
def test_misc():
    trans()
    print(u'\U0001f604'.encode('unicode-escape'))
    print(u'\U0001f604')
    ss = u'\U0001f604'
    xx = chr(ss[0])
    print("ss({}) xx({})".format(ss, xx))
    # -*- coding: UTF-8 -*-
    #convert to unicode
    teststring =  "I am happy \U0001f604"
    # #teststring = unicode(teststring, 'utf-8')

    #encode it with string escape
    teststring = teststring.encode('unicode_escape')
    print("💗 Growing Heart")
    print(emoji.emojize('Water! :water_wave:'))
    print(emoji.demojize(u'🌊')) # for Python 2.x
# print(emoji.demojize('🌊')) # for Python 3.x.
    print(u"And \U0001F60D")
    print("(-woman) astronaut", chr(int("0001f680", 16)))
    print("woman_astronaut", chr(int("0x0001f680", 0)))

    print("\U0001f483\U0001f3fe")

    print(chr(0x001f483),chr(0x001f3fe))
    print('💃 🏾 ')
    print(chr(0x001f483)+chr(0x001f3fe))
    print('💃🏾 ')
    print(chr(int('1f483',16))+chr(int('1f3fe',16)))
    print('%8s %8s %8s' % qw_tuple('surf wave whitecap'))
    print("('%s', '%s', '%s')" % qw_tuple("surf's-up wave rip-curl"))
Example #9
0
def get_from_local_cache(raw_emoji):
    filename = emoji.demojize(raw_emoji).replace(":", "")
    cached_filename = Config.CACHE_DIR + "/" + filename
    split_name = filename.split('/')
    if(os.path.exists(cached_filename)):
        return Config.CACHE_DIR + "/" + split_name[0] + "/01.jpg"
    else:
        raise CorgiNotFoundException("Corgi not found for emoji: {}"
                                     .format(raw_emoji))
Example #10
0
def workaround_freetds_bug(text):
    """
    Emoticons in Instagram posts are outside of 0xffff unicode range
    TDS doesn't like this. We need to use emoji package to convert
    those pesky emoticons to text + there are some other emoticons
    where emoji fails, I guess I should update emoji DB.
    """
    text = emoji.demojize(text)
    text = text.replace(u'🇫󾓮', u' ')
    text = text.replace(u'🇺', u' ')
    text = text.replace(u'🇺', u' ')
    return text
Example #11
0
def file_parser(filepath):
    data = []
    with open(filepath, "r") as file:
        for line in file.readlines():
            text = emoji.demojize(line).rstrip("\n")
            extracted_emojis = EMOJI_NAMES_PATTERN.findall(text)

            for emoji_name in extracted_emojis:
                text = EMOJI_NAMES_PATTERN.sub("", text)

                data.append((text.strip(), emoji_name.strip()))

    return data
Example #12
0
def put_in_local_cache(corgis):
    for i in corgis:
        corgi = corgis.get(i, None)
        if not corgi:
            continue

        emoji_dir = emoji.demojize(i).replace(":", "")
        try:
            directory = Config.CACHE_DIR + '/' + emoji_dir
            if not os.path.exists(directory):
                os.makedirs(directory)
                urllib.request.urlretrieve(corgi, directory + "/01.jpg")
        except:
            logger.error("Failed on: " + i)
	def on_data(self, data):
		
		data = str(emoji.demojize(data))
		
		decoded = json.loads(str(data))
		if 'place' in decoded and decoded['place'] is not None:
			loc = decoded['place']['bounding_box']['coordinates'][0][0]
			
			tweet = str(emoji.demojize(decoded['text']).encode("unicode_escape"))
			tweet = tweet[1:]
			tweet = tweet.strip("\n")
			tweet = tweet.strip("\.")

			tweet = tweet.replace("\n",". ")
			tweet = tweet.replace("\\'","'")
			tweet = tweet.replace("\\","")
			tweet = tweet.replace("\\\.",".")
			tweet = tweet.replace("\"", "'")
			tweet = tweet.replace("\\n",". ")
			print (tweet)
			tweetLower = tweet.lower()
			if("trump" in tweetLower):
				trump.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n')
				trump.flush()
			if("sanders" in tweetLower or "bernie" in tweet.lower()):
				bernie.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n')
				bernie.flush()
			if("clinton" in tweetLower):
				clinton.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n')
				clinton.flush()
			if("rubio" in tweetLower):
				rubio.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n')
				rubio.flush()
			if("cruz" in tweetLower):
				cruz.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n')
				cruz.flush()
		return True
Example #14
0
    def default_text_handler(self, client, message):
        """
        This is the default text handler provided by Shawk.

        If self.demojize is True, this converts emoji to text and prints the message.
        Otherwise, this simply prints the raw message text.
        """

        greeting = "Shawk received message"

        if self.demojize:
            demojized_text = emoji.demojize(message.text)
            print("{}: {}".format(greeting, demojized_text))
        else:
            print("{}: {}".format(greeting, message.text))
Example #15
0
 async def send_reaction(self, reaction):
     """React to a message."""
     emoji = demojize(reaction.emoji)
     _LOGGER.debug("Reacting with: %s", emoji)
     try:
         await self.slacker.reactions.post('reactions.add', data={
             'name': emoji,
             'channel': reaction.target,
             'timestamp': reaction.linked_event.raw_event['ts']
         })
     except slacker.Error as error:
         if str(error) == 'invalid_name':
             _LOGGER.warning('Slack does not support the emoji %s', emoji)
         else:
             raise
def clean_tweets(tweet):
    # Need to First Clean Out URLs before Tokenization
    tweet = re.sub('htt[^ ]*' ,'URL', tweet)
    
    #tokenizer = nltk.tokenize.treebank.TreebankWordTokenizer()
    cleanWords = twtokenizer.tokenize(tweet)
    
    # Convert to Lowercase
    cleanWords = [t.lower() for t in cleanWords]
    
    # Convert Emoji's to Word Label
    cleanWords = [emoji.demojize(word) for word in cleanWords]
    

    # Normalize (remove punctuation)
    #Remove punctuation
    cleanWords = [word for word in cleanWords if word not in punctuation]
    
    # punc = string.punctuation
    # cleanWords = [t for t in cleanWords if t not in punc]
    # cleanWords = [re.sub('[^0-9a-z]', "", x) for x in cleanWords]
    
    # Remove Empty Vectors
    cleanWords = [x for x in cleanWords if x != '']
     
    # Remove StopWords
    # cleanWords = [word for word in cleanWords if word not in stopwords_short]
    cleanWords = [word for word in cleanWords if word not in stopwords]
    
    # Identify Digits & Convert to Num
    # cleanWords = [re.sub("\d+", "NUM", x) for x in cleanWords]
    
    # Remove all Web/URL References (Replace with String Replacement Above)
    # Could be better to replace with 'URL'
    # cleanWords = [word for word in cleanWords if word[0:3] != 'htt']
    # cleanWords = ['URL' if word[0:3] == 'htt' else word for word in cleanWords ]
    
    # Stem Words
    #cleanWords = [stemmer.stem(x) for x in cleanWords] # call stemmer to stem the input
    
    # Remove Multiple Letters, Replace with only 3 so they are distinguishable, but standardized
    # cleanWords = [re.sub(r'(.)\1{2,}', r'\1\1\1', word) for word in cleanWords ]
    
    # Change all @ References to USER
    # cleanWords = ['USER' if word[0] == '@' else word for word in cleanWords ]
    
    
    return cleanWords
Example #17
0
File: api.py Project: jmduke/corji
 def get_all(self):
     all_emojis = google_spreadsheets.keys(include_empty_keys=True)
     corjis = []
     for this_emoji in all_emojis:
         corgi_urls = ""
         if settings.Config.REMOTE_CACHE_RETRIEVE:
             try:
                 corgi_urls = s3.get_all(this_emoji)
             except CorgiNotFoundException as e:
                 logger.warn("Corji not found for emoji %s", this_emoji)
         if not corgi_urls:
             corgi_urls = google_spreadsheets.get_all(this_emoji)
         emoji_name = emoji.demojize(this_emoji).replace(":", "")
         corjis.append({
             "urls": corgi_urls,
             "emoji": this_emoji,
             "emoji_name": emoji_name
         })
     return {
         "count": len(corjis),
         "emojis": [corji["emoji"] for corji in corjis],
         "results": corjis
     }
Example #18
0
def comment_image(browser, username, comments, blacklist, logger, logfolder):
    """Checks if it should comment on the image"""
    rand_comment = (choice(comments).format(username))
    rand_comment = emoji.demojize(rand_comment)
    rand_comment = emoji.emojize(rand_comment, use_aliases=True)

    open_comment_section(browser)
    comment_input = get_comment_input(browser)

    try:
        if len(comment_input) > 0:
            comment_input[0].clear()
            comment_input = get_comment_input(browser)

            browser.execute_script(
                "arguments[0].value = '" + rand_comment + " ';", comment_input[0])
            # An extra space is added here and then deleted.
            # This forces the input box to update the reactJS core
            comment_input[0].send_keys("\b")
            comment_input = get_comment_input(browser)
            comment_input[0].submit()
            update_activity('comments')
            if blacklist['enabled'] is True:
                action = 'commented'
                add_user_to_blacklist(
                    browser, username, blacklist['campaign'], action, logger, logfolder
                )
        else:
            logger.warning('--> Warning: Comment Action Likely Failed:'
                           ' Comment Element not found')
    except InvalidElementStateException:
        logger.info('--> Warning: Comment Action Likely Failed: Probably InvalidElementStateException')

    logger.info("--> Commented: {}".format(rand_comment.encode('utf-8')))
    sleep(2)

    return 1
Example #19
0
def parse_message(lines):
    """ Divide the message into its components using a regex
    :param lines: list of lines to parse
    :return: list of tuples containing the different parts of the message
    """
    struct = []
    for line in lines:
        # We convert the emojis to text representation for easier handling
        line = emoji.demojize(line)
        match_message = re.match(MESSAGE_REGEX, line)
        if not match_message:
            continue
        message = match_message.group('message')
        match = re.match(REMOVE_MODIFIERS, message)
        if match:
            message = match.group(1) + '' + match.group(3)
        date = match_message.group('day')
        # We need to change the date from DD/MM/YY to YY/MM/DD for easier sorting
        day = date[0:2]
        month = date[3:5]
        year = date[6:]
        new_date = '{}/{}/{}'.format(year, month, day)
        struct.append((new_date, match_message.group('person'), message))
    return struct
Example #20
0
    def on_status(self, status):
        # TODO: avoid duplicate tweets

        tweet = status.text
        language = 'en'

        if not status.retweeted and 'RT @' not in tweet:

            try:
                language = lang(tweet)
            except LangDetectException:
                pass

            if language == 'en':
                extracted_emojis = EMOJI_NAMES_PATTERN.findall(
                    emoji.demojize(tweet)
                )
                cleaned_text = EMOJI_NAMES_PATTERN.sub('', tweet)
                for emoji_name in extracted_emojis:
                    cleaned_emoji = emoji_name.replace(':', '')
                    print(cleaned_text)
                    if cleaned_text.strip() == "":
                        redis.rpush('emoji-ml::{}'.format(cleaned_emoji),
                                    cleaned_text)
Example #21
0
    def parse(self, tweet):
        self.stats['totalTweets'] += 1

        #Look for images
        if 'media' in tweet['entities'] or self._is_instagram(tweet):
            self.stats['tweetsWithPictures'] += 1

        #Look for hashtags
        for hashtag in tweet['entities']['hashtags']:
            self._increase_count(hashtag['text'], self.stats['hashtags'])

        #Look for urls
        if tweet['entities']['urls']:
            self.stats['tweetsWithURL'] += 1
            for url in tweet['entities']['urls']:
                netloc = urlparse(url['expanded_url']).netloc
                self._increase_count(netloc, self.stats['urls'])

        #Look for emoji
        emojis = self.emoji_regex.findall(tweet['text'])
        if emojis:
            self.stats['tweetsWithEmoji'] += 1
            for emoji in emojis:
                self._increase_count(demojize(emoji), self.stats['emoji'])
 def replace_emojis(self, text):
     return re.sub('::', ': :', emoji.demojize(text))
def emoji(origin): 
    try:
        import emoji
        s = emoji.demojize(origin)
        s = s.replace('::', ': :')
        lista_texto = s.split()
        print(lista_texto)
        lista_demoj=[]
        for palavra in lista_texto:            
            parada=False
            cont=0
            while not parada:
                for group in EMOJI_CARACTER.items():
                    cont+=1
                    qtd_emojis=EMOJI_CARACTER.__len__()
                    chave=group[0]
                    valor=group[1]     
                    if chave != palavra:
                        if chave in palavra:
                            palavra=palavra.split(chave)
                            palavra=''.join(palavra)
                            lista_demoj.append(palavra)
                            lista_demoj.append(valor)
                            #print(lista_demoj)
                            #demoj=''.join(lista_demoj)
                            parada=True
                            break
                        else:
                            if palavra in lista_demoj:
                                parada=True
                                break
                            elif palavra==chave:
                                lista_demoj.append(valor)
                                parada=True
                                break
                            elif chave not in palavra and cont <= qtd_emojis:
                                continue
                            else:        
                                lista_demoj.append(palavra)
                                #demoj=''.join(lista_demoj)
                                parada=True
                                break    
                        #print(lista_demoj)
                        #demoj=''.join(lista_demoj)
                        #print(demoj)        
                    else:
                        lista_demoj.append(valor)
                        #print(lista_demoj)
                        #demoj=''.join(lista_demoj)  
                        parada=True
                        break          
        demoj=' '.join(lista_demoj)
        print(origin)
        print(demoj)
        if demoj == origin:
            demoj=None
            return demoj
        else:   
            return demoj
    except Exception as e:
        print(e)
Example #24
0
sock.settimeout(120)
sock.connect((server, port))
sock.send(f'PASS {oauth}\n'.encode('utf-8'))
sock.send(f'NICK {nickname}\n'.encode('utf-8'))
sock.send(f'JOIN #{channel}\n'.encode('utf-8'))

count = 0.0
rate = 0.0

start_time = time.time()

while True:
    resp = sock.recv(2048).decode('utf-8')
    if resp.startswith('PING'):
        sock.send('PONG\n'.encode('utf-8'))
        #logging.info(resp)

    elif len(resp) > 0:
        logging.info(demojize(resp))

    if (count % 4 == 0):
        #time.sleep(0.1)
        end_time = time.time()
        rate = 4 / (end_time - start_time)
        start_time = end_time

    logging.info(f"Calls = {count}, Messages Sent Per Second = {rate}")
    count += 1

sock.close()
Example #25
0
# 링크들을 전부 돌아다니면서 정보수집
for link in target_links:
    # 타겟 데이터 찾고 필요한 모양으로 전처리
    driver.get(link)
    driver.implicitly_wait(5)
    time.sleep(1)
    post_date = driver.find_elements_by_tag_name('time')[-1].get_attribute("datetime").split('T')[0]
    try:
        number_of_like = driver.find_element_by_class_name('Nm9Fw').text.split(' ')[1][:-1].replace(',','')
    except:
        number_of_like = driver.find_element_by_class_name('vcOH2').text.split(' ')[1][:-1].replace(',', '')
    posting = driver.find_elements_by_class_name('C4VMK')[0].text.split('\n')[2:-1]
    posting_text = ''
    for text in posting:
        text = emoji.demojize(text)
        text = text.replace("'", '')
        text = text.replace('"', '')
        posting_text = posting_text + ' ' + text
    # print(date, number_of_like, posting)

    # DB에 데이터 저장
    query_for_insert_data = f"INSERT INTO postings(link, post_date, number_of_like, posting) VALUES ('{link}', '{post_date}', {number_of_like}, '{posting_text}');"
    try:
        cursor.execute(query_for_insert_data)
    except:
        print(link)
        print(query_for_insert_data)
    connection.commit()
    # break
Example #26
0
def test_demojize_name_only_no_space():
    for name in emoji.EMOJI_UNICODE.keys():
        oneway = emoji.emojize(name, False, True)
        roundtrip = emoji.demojize(oneway, True)
        assert name == roundtrip, "%s != %s" % (name, roundtrip)
Example #27
0
def test_shortcut_translation():
    for shortcut in emoji.shortcuts.SHORTCUTS.keys():
        actual = emoji.demojize(shortcut, use_shortcuts=True)
        assert actual != shortcut
        expected = emoji.shortcuts.SHORTCUTS[shortcut]
        assert expected == actual, "%s != %s" % (expected, actual)
Example #28
0
def unicode_to_name(e):
    return emoji.demojize(e.name)
Example #29
0
def echo(update,context): 
    

    bot             = context.bot
    
    chat            = update.effective_chat  # type: Optional[Chat]
    # user            = update.effective_user  # type: Optional[User]
    message         = update.effective_message  # type: Optional[Message]    
    chat_id         = message.chat.id
    chat_type       = message.chat.type
    message_id      = message.message_id
    from_user_name  = message.from_user.username
    from_user_id    = message.from_user.id    
    member          = chat.get_member(from_user_id)
    # date            = message.date
    # try:
    #     message         = update.effective_message.reply_to_message  # type: Optional[Message]    
    #     audio       = message.audio
    #     document    = message.document
    #     animation   = message.animation
    #     photo       = message.photo
    #     sticker     = message.sticker        
    #     video       = message.video
    #     voice       = message.voice
    #     video_note  = message.video_note
    #     contact     = message.contact
    #     pprint.pprint(message.to_dict())
        
    #     if audio is not None:
    #         media       = audio['file_id']   
    #         tipe        = "audio"
    #         image_size  = "0x0"
    #         thumb_id    = ""
    #     elif document is not None:            
    #         media       = document['file_id']
    #         thumb_id    = document['thumb']['file_id']
    #         tipe        = "document"
    #         width       = animation['thumb']['width']
    #         height      = animation['thumb']['height']
    #         image_size  = "%sx%s"%(width,height)
    #     elif animation is not None:            
    #         media       = animation['file_id']
    #         thumb_id    = animation['thumb']['file_id']
    #         tipe        = "animation"
    #         width       = animation['thumb']['width']
    #         height      = animation['thumb']['height']
    #         image_size  = "%sx%s"%(width,height)
    #     elif len(photo) != 0:            
    #         media       = photo[0]['file_id']
    #         thumb_id    = photo[-1].file_id
    #         tipe        = "photo"
    #         width       = photo[-1].width
    #         height      = photo[-1].height
    #         image_size  = "%sx%s"%(width,height)
    #     elif sticker is not None:            
    #         media       = sticker['file_id']
    #         thumb_id    = sticker['thumb']['file_id']
    #         tipe        = "sticker"
    #         width       = sticker['thumb']['width']
    #         height      = sticker['thumb']['height']
    #         image_size  = "%sx%s"%(width,height)
    #     elif video is not None:            
    #         media       = video['file_id']
    #         thumb_id    = video['thumb']['file_id']
    #         tipe        = "video"
    #         width       = video['thumb']['width']
    #         height      = video['thumb']['height']
    #         image_size  = "%sx%s"%(width,height)
    #     elif voice is not None:            
    #         media       = voice['file_id']
    #         tipe        = "voice"
    #         image_size  = "0x0"
    #         thumb_id    = ""
    #     elif video_note is not None:            
    #         media       = video_note['file_id']
    #         tipe        = "video_note"
    #     elif contact is not None:            
    #         media       = contact['vcard']
    #         tipe        = "contact"
    #         image_size  = "0x0"
    #         thumb_id    = ""
    #     keyword = update.effective_message.text
    #     print (tipe,keyword)
    #     # pprint.pprint (update.message.to_dict())
    #     sqlUpdate = "UPDATE media SET thumb_id = ?, image_size = ? WHERE media_keyword = ? AND chat_id = '-1001162202776'"
    #     cur.execute(sqlUpdate, (thumb_id, image_size, keyword))
    #     db.commit()
    # except Exception as e:
    #     print (e)
    
    lock.acquire(True)
    try:        
        sql             = "SELECT english_day FROM setting WHERE chat_id = '%s'"%chat_id
        bar, jum        = eksekusi(sql)
        if jum == 0:
            pass
        else:               
            try:                   
                translator = Translator()                
                try:
                    message = re.sub(r"(?:\@|https?\://)\S+", "", message.text.encode().decode('utf-8'))                    
                except:
                    if not message.caption:
                        return
                    elif message.caption ==None:
                        message = "this is caption"
                    else:
                        message = re.sub(r"(?:\@|https?\://)\S+", "", message.caption.encode().decode('utf-8'))
                        # message = message.caption.encode('ascii', 'ignore').decode('ascii')
                message = re.sub(r'".*?"', "", message)
                message = re.sub(r'/.*', "", message)
                message = re.sub(r"\b[A-Z\.]{2,}s?\b", "", message)
                try:
                    a           = translator.detect(emoji.demojize(message)).lang
                    sekarang    = datetime.datetime.now()
                    tanggal     = '{:%Y-%m-%d}'.format(sekarang)
                    hari        = datetime.datetime.strftime(sekarang.date(),"%a")
                    if hari == bar[0][0] and a != 'en':
                        cek = "SELECT user_id, mute FROM blacklist WHERE chat_id = '%s' AND user_id = '%s' AND tanggal = '%s'"%(chat_id,from_user_id,tanggal)
                        bar, jum = eksekusi(cek)
                        if jum == 0:
                            infut = "INSERT INTO blacklist (chat_id, chat_type, user_id, user_name, mute,tanggal) VALUES ('%s','%s','%s','%s',0,'%s')"%(chat_id, chat_type, from_user_id, from_user_name,tanggal)
                            cur.execute(infut)
                            db.commit()
                            bot.send_message(chat_id,  random.choice(teks), reply_to_message_id=message_id)
                        elif jum != 0 and bar[0][1] < 3:
                            infut = "UPDATE blacklist SET mute = mute+1 WHERE chat_id = '%s' AND user_id = '%s' AND tanggal = '%s'"%(chat_id, from_user_id,tanggal)
                            cur.execute(infut)
                            db.commit()
                            sisa = 2-bar[0][1]
                            if sisa == 0:
                                if member.status == 'administrator' or member.status == 'creator':
                                    bot.send_message(
                                        chat_id, 
                                        'Your next-non-english chat will be deleted.', 
                                        reply_to_message_id=message_id)
                                else:
                                    bot.send_message(
                                        chat_id, 
                                        'Your next-non-english chat will make you muted to this group for 24 hours.', 
                                        reply_to_message_id=message_id)
                            else:
                                bot.send_message(
                                        chat_id, 
                                        'You have %s remaining'%(sisa), 
                                        reply_to_message_id=message_id)
                        elif jum!=0 and bar[0][1]==3:
                            if member.status == 'administrator' or member.status == 'creator':
                                try:
                                    update.effective_message.delete()
                                except:
                                    bot.send_message(
                                        chat_id, 
                                        'Gak bisa di delete nih', 
                                        reply_to_message_id=message_id)
                            elif member.can_send_messages is None or member.can_send_messages:
                                mutetime    = datetime.datetime.now()+datetime.timedelta(hours=24)
                                tanggalmute = sekarang = '{:%Y-%m-%d %H:%M:%S}'.format(mutetime)
                                infut       = "UPDATE blacklist SET mute_sampe_tanggal = '%s' WHERE chat_id = '%s' AND user_id = '%s' AND tanggal = '%s'"%(tanggalmute,chat_id, from_user_id,tanggal)
                                cur.execute(infut)
                                db.commit()
                                bot.restrict_chat_member(chat_id, from_user_id, until_date=mutetime, can_send_messages=False)
                                bot.send_message(chat_id, "Restricted until {}!".format(tanggalmute), reply_to_message_id=message_id) 
                            else:
                                bot.send_message(chat_id, "Already muted.", reply_to_message_id=message_id)
                except:
                    bot.send_message(chat_id,  "Im stupid bot", reply_to_message_id=message_id)
            except:
                bot.send_message(chat_id,str(traceback.format_exc()), reply_to_message_id=message_id)
    finally:
        lock.release()        
    """
    Return whether the string can be interpreted as a date.

    :param string: str, string to check for date
    :param fuzzy: bool, ignore unknown tokens in string if True
    """
    try:
        parse(string, fuzzy=fuzzy)
        return True

    except ValueError:
        return False


# Iterate through all the files and send messages in whatsapp
for file in os.listdir(chatsFolder):
    filename = 'B-{}'.format(str(file[19:-4]))
    whatsapp.selectContact(filename.strip())
    # Strips the newline character from the end of message
    message_file = open(chatsFolder + file, 'r')
    Lines = message_file.readlines()
    message = ""
    for line in Lines:
        if is_date(line[0:8]):
            whatsapp.sendMessage(emoji.demojize(message, delimiters=("", "")))
            message = line
        else:
            message = message + line

    os.rename(chatsFolder + file, restoredFolder + filename)
def add_more_posts(companies, addDirection, addDate):
    for company in companies:
        ## import company csv as dataframe
        csvName = company + '.csv'
        output_path = pathlib.Path('../../../data/all_instagram_posts')
        df = pd.read_csv(output_path.joinpath(csvName))

        ## get earliest and latest date
        oldestDate = pd.to_datetime(df['date_utc'].min()) ## earliest date in data
        recentDate = pd.to_datetime(df['date_utc'].max()) ## most recent date in data

        if addDirection == 'beginning':
            SINCE = oldestDate
            UNTIL = addDate

        if addDirection == 'end':
            SINCE = addDate
            UNTIL = recentDate

        posts = instaloader.Profile.from_username(instagram.context, company).get_posts()

        processed = 1
        for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):

            print(post.date)
            print("...scraping info for post %i, %s" % (processed, company))


            post_info = {
                "shortcode": post.shortcode,
                "username": company,
                "date_utc": post.date_utc.strftime('%Y-%m-%d %H:%M:%S.%f'),
                "is_video": "yes" if post.is_video else "no",
                "is_sponsored": post.is_sponsored,
                "hashtags": (",".join(post.caption_hashtags)).encode('utf-8', errors='ignore'),
                "mentions": (",".join(post.caption_mentions)).encode('utf-8', errors='ignore'),
                "caption": (emoji.demojize(post.caption)).encode('utf-8', errors='ignore') if post.caption else "",
                "video_view_count": post.video_view_count if post.is_video else 0,
                "video_length": post.video_duration if post.is_video else 0,
                "likes": post.likes,
                "comments": post.comments,
                "location_name": (post.location.name).encode('utf-8', errors='ignore') if post.location else "",
                "location_latlong": " ".join((str(post.location.lat), str(post.location.lng))) if post.location else ""
                }

            processed += 1


            file_path = os.path.join(output_path, csvName)

            fieldnames=["shortcode", "username", "date_utc", "is_video",
             "is_sponsored", "hashtags", "mentions", "caption", "video_view_count",
             "video_length", "likes", "comments", "location_name", "location_latlong"]



            #bigdict = {'column_1': 1, 'column_2': 2, 'column_3': 3}

            with open(file_path, 'a+') as csv_file:
                #fieldnames = ['column_1', 'column_2', 'column_3']
                writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=',')
                #if '\n' not in csv_file.readlines()[-1]:
                #    csv_file.write("\n")
                writer.writerow(post_info)





        print("...scraped %i posts for %s" % (processed - 1, company))
    print("Done scraping!")
Example #32
0
"""
 @author    : macab (macab@debian)
 @file      : emoji
 @created   : Wednesday Mar 20, 2019 23:05:24 IST
"""

import emoji

if __name__ == "__main__":

    # grinning face
    print("\U0001f600")

    # grinning squinting face
    print("\U0001F606")

    # rolling on the floor laughing
    print("\U0001F923")

    print(emoji.emojize(":grinning_face_with_big_eyes:"))
    print(emoji.demojize('😍'))
    def clean_tweet(self, text):
        # FIXED UNICODE
        # text = preprocess.fix_bad_unicode(text)
        text = ftfy.fix_text(text)

        # GET TEXT ONLY FROM HTML
        text = BeautifulSoup(text, features='lxml').getText()

        # UN-PACK CONTRACTIONS
        text = preprocess.unpack_contractions(text)

        # REMOVE URL
        # text = preprocess.replace_urls(text)
        text = preprocessing.replace_urls(text)

        # REMOVE EMAILS
        # text = preprocess.replace_emails(text)
        text = preprocessing.replace_emails(text)

        # REMOVE PHONE NUMBERS
        # text = preprocess.replace_phone_numbers(text)
        text = preprocessing.replace_phone_numbers(text)

        # REMOVE NUMBERS
        # text = preprocess.replace_numbers(text)
        text = preprocessing.replace_numbers(text)

        # REMOVE CURRENCY
        # text = preprocess.replace_currency_symbols(text)
        text = preprocessing.replace_currency_symbols(text)

        # REMOVE ACCENTS
        # text = preprocess.remove_accents(text)
        text = preprocessing.remove_accents(text)

        # CONVERT EMOJIS TO TEXT
        words = text.split()
        reformed = [
            self.SMILEY[word] if word in self.SMILEY else word
            for word in words
        ]
        text = " ".join(reformed)
        text = emoji.demojize(text)
        text = text.replace(":", " ")
        text = ' '.join(text.split())

        # SPLIT ATTACHED WORDS
        text = ' '.join(re.findall('[A-Z][^A-Z]*', text))

        # SPLIT UNDERSCORE WORDS
        text = text.replace('_', ' ')

        # REMOVE PUNCTUATION
        # text = preprocess.remove_punct(text)
        text = preprocessing.remove_punctuation(text)

        # Remove numbers
        text = re.sub(r'\d', '', text)

        # REMOVE WORDS LESS THAN 3 CHARACTERS
        text = re.sub(r'\b\w{1,2}\b', '', text)

        # NORMALIZE WHITESPACE
        # text = preprocess.normalize_whitespace(text)
        text = preprocessing.normalize_whitespace(text)

        return text
Example #34
0
def clean_text(val):
    val = misspelled_correction(val)
    val = p.clean(val)
    val = ' '.join(punctuation(emoji.demojize(val)).split())

    return val
Example #35
0
    punctuations = '''()-[]{};:'"\,<>./@#$%^&_~'''

    for x in val.lower():
        if x in punctuations:
            val = val.replace(x, " ")
    return val


# In[8]:

punctuation("test ombak@ #ldfldlf??? !! ")

# In[9]:

data.clean_content = data.clean_content.apply(
    lambda x: ' '.join(punctuation(emoji.demojize(x)).split()))

# In[10]:


def clean_text(val):
    val = misspelled_correction(val)
    val = p.clean(val)
    val = ' '.join(punctuation(emoji.demojize(val)).split())

    return val


# In[11]:

clean_text("saya punya ide💡 bag00ss@@ ! ? ")
     json.dump(e_codes_json, f)


# In[ ]:

#def get_emoji_counts(master):
    emoji_counts = {}
    for i in range(0,len(master)):
        tweet = master.loc[i,'statusText']
        date = master.loc[i,'statusCreatedAt']
        date = datetime.datetime.strptime(date,'%a %b %d %H:%M:%S %Z %Y')
        date_ft = date.strftime('%Y_%m_%d')

        tokens = twtokenizer.tokenize(tweet)
        cleanWords = [word for word in cleanWords if word[0:3] != 'htt']
        tokens = [emoji.demojize(token) for token in tokens if token != ':']
        # tokens = [word for word in tokens if word not in string.punctuation]

        for token in tokens:
            if re.match(':+*:',token):
                if date_ft not in emoji_counts.keys():
                    emoji_counts[date_ft] = {}
                if token in emoji_counts[date_ft]:
                    emoji_counts[date_ft][token] +=1
                else:
                    emoji_counts[date_ft][token] = 1
    return emoji_counts


# In[ ]:
Example #37
0
def test_smile_emoji2():
    txt = u'(test asdad :smile:)'
    assert emoji.demojize(txt, use_shortcuts=True) == u'(test asdad :smile:)'
def transTwts(configs, dest):
    print('transTwts() started. ')
    try:

        #global database_paras, con
        sql_twts = db_op.SQL_tweets()

        host = configs['database']['host'].replace('"', '')
        user = configs['database']['user'].replace('"', '')
        password = configs['database']['password'].replace('"', '')
        db = configs['database']['db'].replace('"', '')

        db_info_list = []
        db_info_list.append(host)
        db_info_list.append(user)
        db_info_list.append(password)
        db_info_list.append(db)

        db_info_str = ','.join(db_info_list)

        database_paras = db_op.Database_parameters(host, user, password, db)


        #select_sql = r"SELECT tid, text  FROM tweets.tweet where tweet_lang <> 'en' order by tid desc limit 50;"
        #select_sql =  r"SELECT tid, text, url1  FROM tweets.tweet where tweet_lang = 'ar' order by tid desc   limit 1 ;"
        select_sql = r"SELECT tid, text, url1, tweet_lang  FROM tweets.tweet  order by tid desc   limit 1 ;"

        translate = Translator()

        while True:

            con = sql_twts.connect2database(database_paras)
            #results = sql_twts.select_db(r'SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED ;', con)
            results = sql_twts.select_db(select_sql, con)
            con.close()
            #print('results in transTwts: ', results)

            if len(results) < 1:
                time.sleep(10)
                continue
            df = pd.DataFrame(list(results), columns=['tweetID', 'text', 'url1', 'tweet_lang'])
            df = df.sort_values(by=['tweetID'], ascending=False)
            df['tweetID'] = df['tweetID'].astype(str)
            i = int(random.random() * len(df))
            #print('df: ', df.ix[0, 'url1'])
            #print('i: ', i)
            #df = df.iloc[i]
            texts = list(df['text'])
            #urls = list(df['url1'])

            #texts = texts[i:i+1] # randomly get 1 tweet
            #print('texts: ', texts)
            for j in range(len(texts)):
                texts[j] = re.sub(r'https{0,1}:\/\/t.co\/[a-zA-Z0-9]+', '', texts[j])
                texts[j] = re.sub(r'#', '', texts[j])
                texts[j] = re.sub(r'@[a-zA-Z0-9_]+', '', texts[j])
                #RE_EMOJI = re.compile('[\U00010000-\U0010ffff]', flags=re.UNICODE)
                #texts[j] = RE_EMOJI.sub(r'', texts[j])
                texts[j] = emoji.demojize(texts[j])

            #print('texts: ', texts)
            trans = translate.translate(texts, dest)
            traneEn = translate.translate(texts, 'en')
            translateds = [i.text for i in trans]
            translateds_En = [i.text for i in traneEn]

            #url_list = url1.split(";")

            for j in range(len(texts)):
                #print(r'df[text]: ', df.ix[i, 'text'])   # restore the random tweet
                lang = df.ix[j, 'tweet_lang'].strip()
                try:
                    lang_full = LANGUAGES[lang].capitalize()
                except:
                    lang_full = 'Unknown'
                print(r'Tweets translation (original language is {}): {} {}'.format(lang_full, df.ix[j, 'text'], df.ix[j, 'url1'].replace(';', '  ')))
                #print(r'df[text]: ', df.ix[j, 'url1'].replace(';', '  '))
                print('Tweets translation (English): ', translateds_En[j])
                print('Tweets translation (Chinese): ', translateds[j])
                print('')

            # print(r'df[text]: ', df['text'])
            # print('translateds: ', translateds)

            time.sleep(20)
            #print('Translation(text): {} , {}'.format(trans.text, text))


            #
            # sql_cls = db_op.SQL_tweets()
            # images_ID = list(df['tweetID'])
            #
            # classified = list(df['classified'])
            # con = sql_twts.connect2database(database_paras)
            #
            # if len(labels) > 0:
            #     # tried_url = 3 : the tweet images have been classified.
            #     # print('Probs: ', type(probs))
            #     # print('Probs: ', probs)
            #     sql_cls.update_rows('tweet', ['tid', 'Flooded', 'classified', 'tried_url', 'Flooded_prob'],
            #                         [images_ID, flooded, classified, [3] * len(images_ID), probs], db_info_str)
            #     print('labels: ', labels)
            #
            #
            # con.close()
    #

    except Exception as e:
        print("Error in transTwts(): ", str(e))
        time.sleep(10)
        transTwts(configs, dest)
Example #39
0
def test_smile_emoji2():
    txt = u'(test asdad :smile:)'
    assert emoji.demojize(txt, use_shortcuts=True) == u'(test asdad :smile:)'
Example #40
0
def test_shortcuts():
    assert emoji.demojize(u'\U0001F376 :S :S :S', no_space=True, use_shortcuts=True) == u':sake_bottle_and_cup: :confounded: :confounded: :confounded:'
Example #41
0
def test_shortcuts():
    assert emoji.demojize(
        u'\U0001F376 :S :S :S', no_space=True, use_shortcuts=True
    ) == u':sake_bottle_and_cup: :confounded: :confounded: :confounded:'
Example #42
0
if __name__ == '__main__':
    trans()
    print(u'\U0001f604'.encode('unicode-escape'))
    print(u'\U0001f604')
    ss = u'\U0001f604'
    #xx = chr(ss[0])
    #print("ss({}) xx({})".format(ss, xx))
    # -*- coding: UTF-8 -*-
    #convert to unicode
    teststring =  "I am happy \U0001f604"
    # #teststring = unicode(teststring, 'utf-8')

    #encode it with string escape
    teststring = teststring.encode('unicode_escape')
    print("💗 Growing Heart")
    print(emoji.emojize('Water! :water_wave:'))
    print(emoji.demojize(u'🌊')) # for Python 2.x
# print(emoji.demojize('🌊')) # for Python 3.x.
    print(u"And \U0001F60D")
    print("(-woman) astronaut", chr(int("0001f680", 16)))
    print("woman_astronaut", chr(int("0x0001f680", 0)))

    print("\U0001f483\U0001f3fe")

    print(chr(0x001f483),chr(0x001f3fe))
    print('💃 🏾 ')
    print(chr(0x001f483)+chr(0x001f3fe))
    print('💃🏾 ')
    print(chr(int('1f483',16))+chr(int('1f3fe',16)))
Example #43
0
def test_demojize_complicated_string():
    constructed = u"testing :baby::emoji_modifier_fitzpatrick_type-3: with :eyes: :eyes::eyes: modifiers :baby::emoji_modifier_fitzpatrick_type-5: to symbols ヒㇿ"
    emojid = emoji.emojize(constructed)
    destructed = emoji.demojize(emojid)
    assert constructed == destructed, "%s != %s" % (constructed, destructed)
Example #44
0
async def ebay_handle(group, task):
    hy_task = ANATask(task)
    task_log = [hy_task.task_type, hy_task.task_data]
    # logger.info("connecting")
    task = hy_task.task_data
    time_now = (datetime.now() + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S')
    with engine.connect() as conn:

        del_body = delete(ebay_product_report_result).where(
            ebay_product_report_result.c.task_id == task['task_id'],
        )

        conn.execute(del_body)

        try:
            es = ESBody()
            # # 逐个任务完成查询es写入db
            search_body = es.create_search(task)
            search_body = await get_permission_es_body(task['user_id'], search_body, task['site'])

            logger.info("========================es请求体================================")
            logger.info(json.dumps(search_body))
            logger.info("========================es请求体================================")

            es_connection = Elasticsearch(hosts=EBAY_ELASTICSEARCH_URL, timeout=ELASTIC_TIMEOUT)

            index_result = await es_connection.search(
                index=task['index_name'],
                body=search_body,
                size=task['result_count'])
            # logger.info(index_result)
            # 报告商品结果列表
            the_es_result = index_result['hits']['hits']
            name_ids = []
            # 构造品类IDS
            for item in the_es_result:
                # logger.info(item)
                for category_id in item['_source']['leaf_category_id']:
                    name_ids.append(category_id)
            # 查出category_path
            select_category_name = select([
                ebay_category.c.category_name,
                ebay_category.c.category_id,
                ebay_category.c.category_id_path,
                ebay_category.c.category_name_path
            ]).where(
                and_(
                    ebay_category.c.category_id.in_(name_ids),
                    ebay_category.c.site == task['site']
                ))
            cursor_name = conn.execute(select_category_name)
            records_name = cursor_name.fetchall()
            logger.info("=======补全category_path的id========")
            logger.info(name_ids)
            logger.info("===============")
            # 生成类目path
            for db_info in records_name:
                for category in the_es_result:
                    for low_id in category['_source']['leaf_category_id']:
                        # logger.info(low_id)
                        if low_id == db_info['category_id']:
                            name_list = db_info['category_name_path'].split(':')
                            id_list = db_info['category_id_path'].split(':')
                            complete_list = []
                            category['_source']['category_path'] = []
                            try:
                                for i in range(3):
                                    complete_list.append({"name": name_list.pop(0), "id": id_list.pop(0)})
                                category['_source']['category_path'].append(complete_list)
                            except Exception as e:
                                logger.info(e)
                                category['_source']['category_path'].append(complete_list)

            # 逐个商品更新db
            get_result_count = 0
            sum_data = {
                "sold_total": 0,
                "sum_sold_last_3": 0,
                "sum_sold_last_7": 0,
                "sum_sold_last_1": 0,
                "sum_gmv_last_3": 0,
                "sum_gmv_last_7": 0,
                "sum_gmv_last_1": 0
            }
            for item in the_es_result:
                # 构造商品dict
                sum_data['sold_total'] += item['_source']['sold_total']
                sum_data['sum_sold_last_3'] += item['_source']['sold_last_3']
                sum_data['sum_sold_last_7'] += item['_source']['sold_last_7']
                sum_data['sum_sold_last_1'] += item['_source']['sold_last_1']
                sum_data['sum_gmv_last_3'] += item['_source']['gmv_last_3']
                sum_data['sum_gmv_last_7'] += item['_source']['gmv_last_7']
                sum_data['sum_gmv_last_1'] += item['_source']['gmv_last_1']
                result_info = {
                    "task_id": task['task_id'],
                    "item_id": item['_source']['item_id'],
                    "img": item['_source']['img'],
                    "title": emoji.demojize(item['_source']['title']),
                    "site": item['_source']['site'],
                    "brand": item['_source']['brand'],
                    # 需要构造
                    "category_path": str(item['_source']['category_path']),
                    "store_location": item['_source']['store_location'],
                    "item_location": item['_source']['item_location'],
                    "item_location_country": item['_source']['item_location_country'],
                    "seller": item['_source']['seller'],
                    "price": item['_source']['price'],
                    "gmv_last_3_pop": item['_source']['gmv_last_3_pop'],
                    "gmv_last_3": item['_source']['gmv_last_3'],
                    "gmv_last_1": item['_source']['gmv_last_1'],
                    "gmv_last_7": item['_source']['gmv_last_7'],
                    "sold_last_7": item['_source']['sold_last_7'],
                    "sold_last_1": item['_source']['sold_last_1'],
                    "sold_last_3": item['_source']['sold_last_3'],
                    "visit": item['_source']['visit_last_1'],
                    "cvr": item['_source']['sold_last_1'] / item['_source']['visit_last_1'] if item['_source'][
                                                                                                   'visit_last_1'] != 0 else 0,
                    "date": (datetime.now()).strftime('%Y-%m-%d %H:%M:%S'),
                    "update_time": time_now
                }
                # logger.info(result_info)

                # 插入商品信息

                ins = insert(ebay_product_report_result)
                insert_stmt = ins.values(result_info)
                on_duplicate_key_stmt = insert_stmt.on_duplicate_key_update(
                    task_id=insert_stmt.inserted.task_id,
                    item_id=insert_stmt.inserted.item_id,
                    img=insert_stmt.inserted.img,
                    title=insert_stmt.inserted.title,
                    site=insert_stmt.inserted.site,
                    brand=insert_stmt.inserted.brand,
                    seller=insert_stmt.inserted.seller,
                    price=insert_stmt.inserted.price,
                    category_path=insert_stmt.inserted.category_path,
                    store_location=insert_stmt.inserted.store_location,
                    item_location=insert_stmt.inserted.item_location,
                    item_location_country=insert_stmt.inserted.item_location_country,
                    gmv_last_3_pop=insert_stmt.inserted.gmv_last_3_pop,
                    gmv_last_3=insert_stmt.inserted.gmv_last_3,
                    gmv_last_1=insert_stmt.inserted.gmv_last_1,
                    gmv_last_7=insert_stmt.inserted.gmv_last_7,
                    sold_last_7=insert_stmt.inserted.sold_last_7,
                    sold_last_1=insert_stmt.inserted.sold_last_1,
                    sold_last_3=insert_stmt.inserted.sold_last_3,
                    visit=insert_stmt.inserted.visit,
                    cvr=insert_stmt.inserted.cvr,
                    date=insert_stmt.inserted.date,
                )
                result = conn.execute(on_duplicate_key_stmt)
                # logger.info(result)
                get_result_count += 1

            # 更新任务状态
            logger.info(sum_data)
            ins = update(ebay_custom_report_task)
            ins = ins.values({
                "status": 1,
                "update_time": time_now,
                "get_result_count": get_result_count,
                "product_total": get_result_count,
                "sold_total": sum_data['sold_total'],
                "sum_sold_last_3": sum_data['sum_sold_last_3'],
                "sum_sold_last_7": sum_data['sum_sold_last_7'],
                "sum_sold_last_1": sum_data['sum_sold_last_1'],
                "sum_gmv_last_3": round(sum_data['sum_gmv_last_3'], 2),
                "sum_gmv_last_7": round(sum_data['sum_gmv_last_7'], 2),
                "sum_gmv_last_1": round(sum_data['sum_gmv_last_1'], 2)
            }).where(
                ebay_custom_report_task.c.task_id == task['task_id']
            )
            result = conn.execute(ins)
            # logger.info(result)
            # 添加消息通知
            ins_msg = insert(ana_user_msg)
            insert_stmt_msg = ins_msg.values(
                {
                    "user_id": task['user_id'],
                    "msg_id": str(task['user_id']) + str(int(time.time())),
                    "msg_content": "您的Ebay自定义报告" + task['report_name'] + "于" +
                                   time_now + "生成成功,请及时查看!",
                    "create_at": time_now,
                    "status": 0
                }
            )
            result_msg = conn.execute(insert_stmt_msg)
        except Exception as e:
            logger.info(e)
            # 更新任务状态
            ins = update(ebay_custom_report_task)
            ins = ins.values({
                "status": 2,
                "update_time": time_now,
                # "get_result_count": get_result_count,
                # "product_total": get_result_count,
                # "sold_total": sum_data['sold_total'],
                # "sum_sold_last_3": sum_data['sold_last_3'],
                # "sum_sold_last_7": sum_data['sold_last_7'],
                # "sum_sold_last_1": sum_data['sold_last_1'],
                # "sum_gmv_last_3": round(sum_data['gmv_last_3'], 2),
                # "sum_gmv_last_7": round(sum_data['gmv_last_7'], 2),
                # "sum_gmv_last_1": round(sum_data['gmv_last_1'], 2)
            }).where(
                ebay_custom_report_task.c.task_id == task['task_id']
            )
            result = conn.execute(ins)
            # logger.info(result)
            # 添加消息通知
            ins_msg = insert(ana_user_msg)
            insert_stmt_msg = ins_msg.values(
                {
                    "user_id": task['user_id'],
                    "msg_id": str(task['user_id']) + str(int(time.time())),
                    "msg_content": "您的Ebay自定义报告" + task['report_name'] + "于" +
                                   time_now + "生成失败,请重新编辑条件或联系网站管理员!",
                    "create_at": time_now,
                    "status": 0
                }
            )
Example #45
0
def form_emoji_dict(s):
    emo = emoji.demojize(' '.join(c for c in s if c in emoji.UNICODE_EMOJI))
    emoji_chain.append(emo)
Example #46
0
def comment_image(browser, username, comments, blacklist, logger, logfolder):
    """Checks if it should comment on the image"""
    # check action availability
    if quota_supervisor("comments") == "jump":
        return False, "jumped"

    rand_comment = random.choice(comments).format(username)
    rand_comment = emoji.demojize(rand_comment)
    rand_comment = emoji.emojize(rand_comment, use_aliases=True)

    open_comment_section(browser, logger)
    # wait, to avoid crash
    sleep(3)
    comment_input = get_comment_input(browser)

    try:
        if len(comment_input) > 0:
            # wait, to avoid crash
            sleep(2)
            comment_input = get_comment_input(browser)
            # below, an extra space is added to force
            # the input box to update the reactJS core
            comment_to_be_sent = rand_comment

            # wait, to avoid crash
            sleep(2)
            # click on textarea/comment box and enter comment
            (ActionChains(browser).move_to_element(
                comment_input[0]).click().send_keys(
                    comment_to_be_sent).perform())
            # wait, to avoid crash
            sleep(2)
            # post comment / <enter>
            (ActionChains(browser).move_to_element(comment_input[0]).send_keys(
                Keys.ENTER).perform())

            update_activity(
                browser,
                action="comments",
                state=None,
                logfolder=logfolder,
                logger=logger,
            )

            if blacklist["enabled"] is True:
                action = "commented"
                add_user_to_blacklist(username, blacklist["campaign"], action,
                                      logger, logfolder)
        else:
            logger.warning("--> Comment Action Likely Failed!"
                           "\t~comment Element was not found")
            return False, "commenting disabled"

    except InvalidElementStateException:
        logger.warning("--> Comment Action Likely Failed!"
                       "\t~encountered `InvalidElementStateException` :/")
        return False, "invalid element state"

    logger.info("--> Commented: {}".format(rand_comment.encode("utf-8")))
    Event().commented(username)

    # get the post-comment delay time to sleep
    naply = get_action_delay("comment")
    sleep(naply)

    return True, "success"
Example #47
0
    def onPressButton(self):
        button = self.user.message.text
        logger.info('{} - нажата кнопка {}'.format(self.user.message.chat.id,
                                                   emoji.demojize(button)))
        if button == self.profileButton:
            markup = telebot.types.InlineKeyboardMarkup()
            markup.add(
                telebot.types.InlineKeyboardButton(
                    text='🛒 Мои покупки', callback_data='my_purchases'))
            markup.add(
                telebot.types.InlineKeyboardButton(text='💰 Мои продажи',
                                                   callback_data='my_sales'))
            markup.add(
                telebot.types.InlineKeyboardButton(
                    text='🤝 Пригласить друга', callback_data='invite_message'))
            markup.add(
                telebot.types.InlineKeyboardButton(
                    text='🏷 Ввести код купона',
                    callback_data='enter_coupon_code'))
            bot.send_message(self.user.message.chat.id,
                             '₴ Баланс: {}\n'
                             '🛒 Покупок: {}\n'
                             '💰 Продаж: {}'.format(
                                 self.user.balance,
                                 db.get_purchases(self.user.id),
                                 db.get_sells(self.user.id)),
                             reply_markup=markup)
        elif button == self.buyButton:
            #тут переход на другую страницу
            self.user.setState('shop')

            if db.get_selling_products():
                bot.send_message(self.user.message.chat.id,
                                 'Товары в продаже:',
                                 parse_mode='HTML',
                                 reply_markup=Page(self.user).getMarkup())
                for product in db.get_selling_products():
                    text = '\n\n🔹 {}\nЦена: {} ₴\nКупить: /buy_{}'.format(
                        product['title'], product['price'],
                        utils.convertInt(product['id']))
                    photos = db.get_sale_app_photos(product['id'])
                    media_group = []
                    for num in range(len(photos)):
                        media_group.append(
                            types.InputMediaPhoto(
                                photos[num]['photo'],
                                caption=text if num == 0 else ''))
                    bot.send_media_group(self.user.message.chat.id,
                                         media_group)
            else:
                bot.send_message(
                    self.user.message.chat.id,
                    'К сожалению, сейчас ничего нет в продаже. Почему бы не продать что-то?',
                    reply_markup=Page(self.user).getMarkup())

        elif button == self.sellButton:
            if db.check_sale_rules(self.user.id) == 1:
                #тут переход на другую страницу
                self.user.setState('sale')

                #обновление страницы
                bot.reply_to(
                    self.user.message,
                    "Вы начали создание товара на продажу, если Вы передумали что-либо продавать или ввели неккоректные данные, нажмите кнопку Отмена. После создания заявки на продажу, модераторы проверят её и Ваш товар станет доступен для покупки другим пользователям. Статус обработки заявки можно посмотреть в личном кабинете.",
                    reply_markup=Page(self.user).getMarkup()).wait()

                bot.send_message(self.user.id,
                                 "Напишите название вашего товара")
            else:
                markup = telebot.types.InlineKeyboardMarkup()
                markup.add(
                    telebot.types.InlineKeyboardButton(
                        text='Принять соглашение',
                        callback_data='sale_confirm_rules'))
                bot.send_message(
                    self.user.id,
                    'Перед созданием первого товара Вам нужно ознакомиться с правилами и советами:\n\n'
                    '- Сделайте хорошие фотографии с нескольких ракурсов\n'
                    '- Составьте подробное описание товара\n',
                    reply_markup=markup)
        elif button == self.infoButton:
            bot.send_message(
                self.user.id, '{} \n {}'.format(msg.info_text,
                                                self.user.balance))
        elif button == self.supportButton:
            self.user.setState('support')

            bot.reply_to(
                self.user.message,
                "Все Ваши сообщения, отправленные после этого будут переданы администрации\nДля завершения нажмите на кнопку 'Завершить'",
                reply_markup=Page(self.user).getMarkup())
Example #48
0
 def command(self, event):
     demojised = emoji.demojize(event["spec"][0])
     event["stdout"].write("%s: %s" % (event["user"].nickname, demojised))
Example #49
0
    async def sc(self, ctx):
        user = ctx.message.author
        msg = ctx.message.clean_content[4:]

        await ctx.message.delete()

        # 金額のランダム生成
        money = self._get_random_money()
        # 金額に対応した色
        colors = self._get_money_colors(money)

        # 矩形を作成して表示
        main_color = colors['main_color']
        back_color = colors['back_color']
        name_color = colors['name_color']
        text_color = colors['text_color']

        format_msg, emoji_list = self._format_text(36, msg)
        stamp_list = await self._get_custom_stamp_list(ctx.guild, msg)

        lines = format_msg.count(os.linesep)
        text_height = 22
        font_size = 20
        height = 150 + lines * text_height

        im = Image.new("RGBA", (450, height), tuple(main_color))
        draw = ImageDraw.Draw(im)
        draw.rectangle((0, 100, 450, height), fill=tuple(back_color))

        # 文字合成
        name_font = ImageFont.truetype(
            str(self.path / "font/migu-1m-regular.ttf"), font_size)
        # ユーザー名のみ少し薄い色
        draw.multiline_text((110, 20),
                            user.display_name,
                            fill=tuple(name_color),
                            font=name_font)
        del name_font

        text_font = ImageFont.truetype(
            str(self.path / "font/migu-1m-bold.ttf"), font_size)
        draw.multiline_text((110, 50),
                            f"¥ {'{:,}'.format(money)}",
                            fill=tuple(text_color),
                            font=text_font)

        draw.multiline_text((20, 115),
                            format_msg,
                            fill=tuple(text_color),
                            font=text_font)

        offset = [0, 0]
        prev_str = ''
        for i, s in enumerate(format_msg):
            if unicodedata.east_asian_width(s) in 'FWA':
                offset[0] += font_size
            else:
                offset[0] += int(font_size / 2)

            # カスタム絵文字と絵文字を画像に置換
            if s in ['@', '%'] and prev_str == '&':
                pos = [20 + offset[0] - font_size, 115 + offset[1]]
                # ダミー文字を塗りつぶし
                draw.rectangle(
                    (pos[0], pos[1], pos[0] + font_size, pos[1] + font_size),
                    fill=tuple(back_color))
                # カスタム絵文字の場合
                if s == '@':
                    data = io.BytesIO(await stamp_list.pop(0).read())
                    stamp_img = Image.open(data).convert('RGBA').resize(
                        (20, 20), Image.BICUBIC)
                    im.paste(stamp_img, (pos[0], pos[1]), stamp_img.split()[3])
                # 絵文字の場合
                elif s == '%':
                    if len(emoji_list) > 0:
                        emoji_str = emoji.demojize(emoji_list.pop(0))[1:-1]
                        # 変換されない絵文字が存在するので念の為チェック(2020/10/4時点で⛩のみ)
                        emoji_img_path = self.path / f'img/emoji/{emoji_str}.png'
                        if os.path.isfile(emoji_img_path):
                            emoji_img = Image.open(emoji_img_path).convert(
                                'RGBA')
                            im.paste(emoji_img, (pos[0], pos[1]),
                                     emoji_img.split()[3])

            prev_str = s

            if s == '\n':
                offset[0] = 0
                offset[1] += text_height

        # ユーザーのサムネを取得してImageに変換
        data = io.BytesIO(await user.avatar_url.read())
        thum = Image.open(data).convert('RGBA')
        del data
        thum = thum.resize((60, 60), Image.BICUBIC)
        # 画像合成
        mask = Image.open(self.path /
                          "img/superchat/mask_circle.jpg").convert('L')
        im.paste(thum, (25, 20), mask.resize((60, 60), Image.HAMMING))

        im.save(self.path / "img/superchat/superchat.png")
        del im

        await ctx.send(file=discord.File(self.path /
                                         "img/superchat/superchat.png"))
def clean_tweets_opt(tweet, lower = True, demoji = True, punc = True, stopwords = [],                      num = False, url = True, stem = False, repeatedChar = False, users = False):
    # Need to Clean Out URLs before Tokenization
    if url:
        tweet = re.sub('htt[^ ]*' ,'URL', tweet)
    
    #tokenizer = nltk.tokenize.treebank.TreebankWordTokenizer()
    cleanWords = twtokenizer.tokenize(tweet)
    
    # lower
    # Convert to Lowercase
    if lower:
        cleanWords = [word.lower() for word in cleanWords]
    
    # demoji
    # Convert Emoji's to Word Label
    if demoji:
        cleanWords = [emoji.demojize(word) for word in cleanWords]

    # punc
    # Remove punctuation, only removes puncutation if only char in token
    if punc:
        cleanWords = [word for word in cleanWords if word not in punctuation]
     
    # Remove StopWords
    # Preferred list passed through function parameters
    cleanWords = [word for word in cleanWords if word not in stopwords]
    
    # num
    # Identify Digits & Convert to Num
    if num:
        cleanWords = [re.sub("\d+", "NUM", x) for x in cleanWords]
    
    # url; opt = remove, replace
    # Remove all Web/URL References
    #if url:
    #    cleanWords = [word for word in cleanWords if word[0:3] != 'htt']
    # cleanWords = ['URL' if word[0:3] == 'htt' else word for word in cleanWords ]
    
    # stem
    # Stem Words
    if stem:
        cleanWords = [stemmer.stem(x) for x in cleanWords] # call stemmer to stem the input
    
    # repeatedChar
    # Remove Multiple Letters, Replace with only 3 so they are distinguishable, but standardized
    if repeatedChar:
        cleanWords = [re.sub(r'(.)\1{2,}', r'\1\1\1', word) for word in cleanWords ]
    
    # users
    # Change all @ References to USER
    if users:
        cleanWords = ['USER' if word[0] == '@' else word for word in cleanWords ]
    
    ## Non-Optional Pre-processing
    # Trim whitespace
    
    
    # Remove Empty Vectors
    cleanWords = [x for x in cleanWords if x != '']
    
    return cleanWords
Example #51
0
def test_demojize_name_only():
    for name in emoji.EMOJI_UNICODE.keys():
        oneway = emoji.emojize(name, False)
        roundtrip = emoji.demojize(oneway)
        assert name == roundtrip, "%s != %s" % (name, roundtrip)
Example #52
0
def test_demojize_complicated_string():
    constructed = u"testing :baby::emoji_modifier_fitzpatrick_type-3: with :eyes: :eyes::eyes: modifiers :baby::emoji_modifier_fitzpatrick_type-5: to symbols ヒㇿ"
    emojid = emoji.emojize(constructed)
    destructed = emoji.demojize(emojid)
    assert constructed == destructed, "%s != %s" % (constructed, destructed)
Example #53
0
def test_shortcut_translation():
    for shortcut in emoji.shortcuts.SHORTCUTS.keys():
        actual = emoji.demojize(shortcut, use_shortcuts=True)
        assert actual!=shortcut
        expected = emoji.shortcuts.SHORTCUTS[shortcut]
        assert expected == actual, "%s != %s" % (expected, actual)
Example #54
0
def test_smile_emoji():
    txt = u'(<some text> :smile:)'
    assert emoji.emojize(
        emoji.demojize(emoji.emojize(txt, use_aliases=True),
                       use_shortcuts=True)) == emoji.emojize(txt,
                                                             use_aliases=True)
Example #55
0
def read_data(X_train, X_test, Y_path, sentence_txt, bigdict, word2vec_model):
    TRAIN_NUM = 119018
    try:
        print('Loading Sentences')
        sentences = word2vec.LineSentence(sentence_txt)
    except:
        print('Reading data to sentences')
        data = pd.read_csv(X_train)
        X_data = data['comment'].values
        testdata = pd.read_csv(X_test)
        X_testdata = testdata['comment'].values
        print(X_data.shape)  # (12000,)
        print(X_testdata.shape)
        X_words = []
        jieba.set_dictionary(bigdict)
        for i in range(len(X_data)):
            line = emoji.demojize(X_data[i])
            seg_list = list(jieba.cut(line, cut_all=False))
            X_words.append(seg_list)
        for j in range(len(X_testdata)):
            line = emoji.demojize(X_testdata[j])
            seg_list = list(jieba.cut(line, cut_all=False))
            X_words.append(seg_list)

        out = open(sentence_txt, "w")
        for sen in X_words:
            for word in sen:
                out.write(word)
                out.write(' ')
            out.write('\n')
        out.close()
        sentences = word2vec.LineSentence(sentence_txt)

    # word2vec
    try:
        print('Loading word2vec model')
        w2v_model = word2vec.Word2Vec.load(word2vec_model)
    except:
        print('Training word2vec model')
        w2v_model = word2vec.Word2Vec(sentences,
                                      iter=32,
                                      size=128,
                                      min_count=3,
                                      workers=4,
                                      sg=1)
        w2v_model.save(word2vec_model)

    embedding_matrix = np.zeros(
        (len(w2v_model.wv.vocab.items()) + 1, w2v_model.vector_size))
    word2idx = {}

    vocab_list = [(word, w2v_model.wv[word])
                  for word, _ in w2v_model.wv.vocab.items()]
    for v, vocab in enumerate(vocab_list):
        word, vec = vocab
        embedding_matrix[v + 1] = vec
        word2idx[word] = v + 1

    global embedding_layer
    embedding_layer = Embedding(input_dim=embedding_matrix.shape[0],
                                output_dim=embedding_matrix.shape[1],
                                weights=[embedding_matrix],
                                trainable=False)
    X_vecs = []
    readfile = open(sentence_txt, "r")
    for line in readfile:
        new_doc = []
        for word in line.split():
            try:
                new_doc.append(word2idx[word])
            except:
                new_doc.append(0)
        X_vecs.append(new_doc)
        if len(X_vecs) >= TRAIN_NUM:
            break
    X = np.array(X_vecs)
    print(X.shape)

    label = pd.read_csv(Y_path)
    Y_data = label['label'].values
    Y = np.array(Y_data)
    Y = Y[0:TRAIN_NUM]

    return X, Y
Example #56
0
 def getEMOJI(self, text):
     def replacement(match):
         return ' TK.EMOJI.'+match.group(1).upper() + ' '
         
     text = emoji.demojize(text)
     return re.sub(u'\:([a-z_-]+)\:', replacement, text)
Example #57
0
import re

print("Start cleaning Data")
jieba.load_userdict("./data/dict.txt.big")
train_file = open("./data/train_x.csv")
train_x = train_file.readlines()
train_file.close()
punctuation_search = re.compile(
    "[\s+\.\!\/_,$%^*(+\"\']+|[+——\>\<!,。??、\-~~@#¥%……&*():]+")
clean_data = []
for id in range(len(train_x)):
    train_x[id] = train_x[id].replace("\n", "")
    train_x[id] = train_x[id].split(",", maxsplit=1)[1]
    #train_x[id] = emoji.demojize(train_x[id])
    word_list = jieba.lcut(train_x[id])
    word_list = [emoji.demojize(i) for i in word_list]
    clean_list = []
    for word in word_list:
        check = punctuation_search.match(word, 0)
        if type(check) == type(None):
            clean_list.append(word)
    if len(clean_list) != 0:
        clean_data.append(clean_list)

print("Start training word2vec")
word2vec_model = gensim.models.Word2Vec(clean_data,
                                        size=200,
                                        window=5,
                                        min_count=5,
                                        workers=3,
                                        iter=30)
def emoji_as_words(emoji_list):
    emoji_literal = [
        emoji.demojize(em, delimiters=('', '')) for em in emoji_list
    ]
    return emoji_literal
Example #59
0
def parse_comment_for_vote(body):
    """ turns a comment into a vote, if possible """
    return parse_emojis_for_vote(demojize(body))
def preprocessing(text):
    # Rimuovo i newline
    text = text.replace("\n", "")
    # Rimuovo i link
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'www\S+', '', text)
    # Sostituisco le emoji con i loro aliases
    text = emoji.demojize(text)
    # Rimuovo i due punti prima e dopo dell'alias
    text = re.sub(r'(:)(.*?)(:)', r' \2 ', text)
    # Rimuovo l'underscore se gli alias sono composti da più parole
    text = re.sub(r'_', ' ', text)
    # Rimuovo lo slash
    text = re.sub(r'/', ' ', text)
    # Rimuovo |
    text = re.sub(r'\|', ' ', text)
    # Rimuovo le parentesi dal testo
    text = re.sub(r'(\()([^)]+)(\))', '\g<2>', text)
    # Sostituisco ']' con '] '
    text = text.replace(']', '] ')
    # Rimuovo i numeri dal testo
    text = re.sub(r'(?<![a-zA-Z]-)(\b\d+\b)', ' ', text)
    # Rimuovo minuti / millioni
    text = re.sub(r'\d+m\b', ' ', text)
    # Rimuovo posizioni
    text = re.sub(r'\d+th\b', ' ', text)
    text = re.sub(r'\d+st\b', ' ', text)
    text = re.sub(r'\d+nd\b', ' ', text)
    text = re.sub(r'\d+rd\b', ' ', text)
    # Rimuovo ore
    text = re.sub(r'\d+h\b', ' ', text)
    text = re.sub(r'\d+am\b', ' ', text)
    text = re.sub(r'(\b\d+h\d+\b)', ' ', text)
    # Rimouvo anni
    text = re.sub(r'\d+s\b', ' ', text)
    # Sostituisco statistiche
    text = re.sub(r'(\d+)(ppg)', ' point per game', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(pt(s?))', ' point', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(reb(s?))', ' rebound', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(rpg)',
                  ' rebound per game',
                  text,
                  flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(ast(s?))', ' assist', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(apg)', ' assist per game', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(stl(s?))', ' steal', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(spg)', ' steal per game', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(blk(s?))', ' block', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(bpg)', ' block per game', text, flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(OT(s?))', ' overtime', text)
    text = re.sub(r'(\d+)(pm)',
                  ' three-point field goal made',
                  text,
                  flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(pa)',
                  ' three-point field goal attempted',
                  text,
                  flags=re.IGNORECASE)
    text = re.sub(r'(\d+)(P%)', ' three-point field goal', text)
    # Sostituisco acronimo ref
    text = re.sub(r'(\bTS%\b)', 'throw shooting percentage', text)
    # Sostituisco free-kick
    text = re.sub(r'(\bfree kick(s?)\b)',
                  'free-kick',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco free-throw'
    text = re.sub(r'(\bfree throw(s?)\b)',
                  'free-throw',
                  text,
                  flags=re.IGNORECASE)
    text = re.sub(r'(\bfreethrown(s?)\b)',
                  'free-throw',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco acronimo ref
    text = re.sub(r'(\bthrow%\b)', 'throw shooting percentage', text)
    # Sostituisco acronimo ref
    text = re.sub(r'(\bfield goal%\b)', 'field goal', text)
    # Sostituisco acronimo ref
    text = re.sub(r'(\bref\b)', 'referee', text, flags=re.IGNORECASE)
    # Sostituisco acronimo ref
    text = re.sub(r'(\bOT\b)', 'overtime', text)
    # Sostituisco acronimo ET
    text = re.sub(r'(\bET\b)', 'extra-time', text)
    # Sostituisco acronimo WC
    text = re.sub(r'(\bWC\b)', 'World Cup', text)
    # Sostituisco acronimo EPL
    text = re.sub(r'(\bEPL\b)', 'English Premier League', text)
    # Sostituisco acronimo PL
    text = re.sub(r'(\bPL\b)', 'Premier League', text)
    # Sostituisco acronimo VAR
    text = re.sub(r'(\bVAR\b)', 'Video Assistant Referee', text)
    # Sostituisco acronimo UCL
    text = re.sub(r'(\bUCL\b)', 'Uefa Champions League', text)
    # Sostituisco acronimo CL
    text = re.sub(r'(\bCL\b)', 'Champions League', text)
    # Sostituisco acronimo UEL
    text = re.sub(r'(\bUEL\b)', 'Uefa Europa League', text)
    # Sostituisco acronimo EL
    text = re.sub(r'(\bEL\b)', 'Europa League', text)
    # Sostituisco acronimo SG
    text = re.sub(r'(\bsg\b)', 'shooting guard', text, flags=re.IGNORECASE)
    # Sostituisco acronimo G
    text = re.sub(r'(\bg\b)', 'shooting guard', text, flags=re.IGNORECASE)
    # Sostituisco acronimo C
    text = re.sub(r'(\bc\b)', 'center', text, flags=re.IGNORECASE)
    # Sostituisco acronimo PF
    text = re.sub(r'(\bpf\b)', 'power forward', text, flags=re.IGNORECASE)
    # Sostituisco acronimo SF
    text = re.sub(r'(\bsf\b)', 'small forward', text, flags=re.IGNORECASE)
    # Sostituisco acronimo F
    text = re.sub(r'(\bf\b)', 'forward', text, flags=re.IGNORECASE)
    # Sostituisco acronimo PPG
    text = re.sub(r'(\bppg\b)', 'point per game', text, flags=re.IGNORECASE)
    # Sostituisco acronimo PTS
    text = re.sub(r'(\bpt(s?)\b)', 'point', text, flags=re.IGNORECASE)
    # Sostituisco acronimo REBS
    text = re.sub(r'(\breb(s?)\b)', 'rebound', text, flags=re.IGNORECASE)
    # Sostituisco acronimo RPG
    text = re.sub(r'(\brpg\b)', 'rebound per game', text, flags=re.IGNORECASE)
    # Sostituisco acronimo ASTS
    text = re.sub(r'(\bast(s?)\b)', 'assist', text, flags=re.IGNORECASE)
    text = re.sub(r'(\bassts\b)', 'assist', text, flags=re.IGNORECASE)
    # Sostituisco acronimo APG
    text = re.sub(r'(\bapg\b)', 'assist per game', text, flags=re.IGNORECASE)
    # Sostituisco acronimo STL
    text = re.sub(r'(\bstl(s?)\b)', 'steal', text, flags=re.IGNORECASE)
    # Sostituisco acronimo SPG
    text = re.sub(r'(\bspg(s?)\b)',
                  'steal per game',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco acronimo BLK
    text = re.sub(r'(\bblk(s?)\b)', 'block', text, flags=re.IGNORECASE)
    # Sostituisco acronimo BPG
    text = re.sub(r'(\bblk(s?)\b)',
                  'block per game',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco triple-double
    text = re.sub(r'(\btriple double\b)',
                  'triple-double',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco double-double
    text = re.sub(r'(\bdouble double\b)',
                  'double-double',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco acronimo FGM
    text = re.sub(r'(\bFGM\b)', 'field goal made', text, flags=re.IGNORECASE)
    # Sostituisco acronimo FGA
    text = re.sub(r'(\bFGA\b)',
                  'field goal attempted',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco acronimo FG
    text = re.sub(r'(\bFG\b)', 'field goal', text, flags=re.IGNORECASE)
    # Sostituisco acronimo FTM
    text = re.sub(r'(\bFTM\b)', 'free throw made', text, flags=re.IGNORECASE)
    # Sostituisco acronimo FTA
    text = re.sub(r'(\bFTA\b)',
                  'free throw attempted',
                  text,
                  flags=re.IGNORECASE)
    # Sostituisco acronimo FT
    text = re.sub(r'(\bFT\b)', 'free throw', text, flags=re.IGNORECASE)
    # Rimuovo highlight(s)
    text = re.sub(r'(\bhighlight(s?)\b)', '', text, flags=re.IGNORECASE)
    # Rimuovo (pre/post)(-)(match thread)
    text = re.sub(r'(\bpost\b( ?))?(\bpre\b( ?))?(-?)(\bmatch\b) (\bthread\b)',
                  '',
                  text,
                  flags=re.IGNORECASE)
    # Rimuovo (pre/post)(-)(game thread)
    text = re.sub(r'(\bpost\b( ?))?(\bpre\b( ?))?(-?)(\bgame\b) (\bthread\b)',
                  '',
                  text,
                  flags=re.IGNORECASE)
    # Rimuovo (daily)(discussion)(thread)
    text = re.sub(r'(\bdaily\b( ?))?(\bdiscussion(s)?\b)(( ?)\bthread\b)?',
                  '',
                  text,
                  flags=re.IGNORECASE)
    # Rimuovo breaking
    text = re.sub(r'(\bbreaking\b)', '', text, flags=re.IGNORECASE)
    # Rimuovo free talk friday
    text = re.sub(r'(\bfree talk friday\b)', '', text, flags=re.IGNORECASE)
    # Rimuovo VIDEO
    text = re.sub(r'(\bVIDEO\b)', '', text)
    # Rimuovo +
    text = re.sub(r'(\+)', '', text)
    # Rimuovo le valute
    text = text.replace('£', '')
    text = text.replace('$', '')
    text = text.replace('€', '')
    # rimuovo acronimo OC
    text = re.sub(r'(\bOC\b)', '', text)
    # Rimuovo i doppi spazi
    text = re.sub(r' {2,}', ' ', text)
    lemmas = [
        token for token in nlp(text)
        if not token.is_stop and not token.is_punct
    ]
    text = " ".join(str(token) for token in lemmas)
    text = text.replace('Serie', 'Serie A')
    return text