def remove_url_from_message(message): message.message = remove_urls(message.message) if message.entities is not None: for e in message.entities: if isinstance(e, MessageEntityTextUrl): e.url = remove_urls(e.url) return message
def __getitem__(self, index): text = self.text[index] text = utils.lower(text) text = utils.remove_hashtags(text) text = utils.remove_user_mentions(text) text = utils.remove_urls(text) data = np.array([ config.identity_mat[config.vocabulary.index(i)] for i in list(text)[::-1] if i in config.vocabulary ], dtype=np.float32) if len(data) > config.max_length: data = data[:config.max_length] elif 0 < len(data) < config.max_length: data = np.concatenate((data, np.zeros((config.max_length - len(data), config.number_of_characters), dtype=np.float32))) elif len(data) == 0: data = np.zeros((config.max_length, config.number_of_characters), dtype=np.float32) label = self.labels[index] data = torch.Tensor(data) return data, label
def create_poemline(self, string): # original text self.originalString = string # texts without symbols or urls self.cleanString = remove_symbols(remove_urls(string)) # number of syllables in line self.syl = line_syl(self.cleanString) # rhymes against last word self.rhymes = rhyme(string.split()[-1], self.RHYME_LEVEL)
async def handler_new_message(event): """NewMessage event handler. """ # skip if Album if hasattr(event, 'grouped_id') and event.grouped_id is not None: return try: logger.debug(f'New message from {event.chat_id}:\n{event.message}') targets = CHANNEL_MAPPING.get(event.chat_id) if targets is None or len(targets) < 1: logger.warning( f'NewMessage. No target channel for {event.chat_id}') return if REMOVE_URLS: event.message.message = remove_urls(event.message.message) for e in event.message.entities: if isinstance(e, MessageEntityTextUrl): e.url = remove_urls(e.url) sent = 0 for chat in targets: mirror_message = None if isinstance(event.message.media, MessageMediaPoll): mirror_message = await client.send_message( chat, file=InputMediaPoll(poll=event.message.media.poll)) else: mirror_message = await client.send_message(chat, event.message) if mirror_message is not None: db.insert( MirrorMessage(original_id=event.message.id, original_channel=event.chat_id, mirror_id=mirror_message.id, mirror_channel=chat)) sent += 1 if sent > LIMIT_TO_WAIT: sent = 0 time.sleep(TIMEOUT_MIRRORING) except Exception as e: logger.error(e, exc_info=True)
async def handler_edit_message(event): try: logger.debug('Edit message') mirror_message = database.find_by_original_id(event.message.id, event.chat_id) if mirror_message is None: return id_message_to_edit = mirror_message['mirror_id'] result = await client( functions.channels.GetMessagesRequest(channel=TARGET_CHAT, id=[id_message_to_edit])) message_to_edit = result.messages[0] if REMOVE_URLS: event.message.message = remove_urls(event.message.message) await client.edit_message(message_to_edit, event.message.message) except Exception as e: logger.error(e, exc_info=True)
async def handler_new_message(event): try: logger.debug(f'New message:\n{event.message}') if REMOVE_URLS: event.message.message = remove_urls(event.message.message) mirror_id = None if (isinstance(event.message.media, MessageMediaPoll)): mirror_id = await client.send_message( TARGET_CHAT, file=InputMediaPoll(poll=event.message.media.poll)) else: mirror_id = await client.send_message(TARGET_CHAT, event.message) database.insert({ 'original_id': event.message.id, 'mirror_id': mirror_id.id, 'original_channel': event.chat_id }) except Exception as e: logger.error(e, exc_info=True)
async def handler_edit_message(event): """MessageEdited event handler. """ try: logger.debug(f'Edit message {event.message.id} from {event.chat_id}') targets = db.find_by_original_id(event.message.id, event.chat_id) if targets is None or len(targets) < 1: logger.warning( f'MessageEdited. No target channel for {event.chat_id}') return if REMOVE_URLS: event.message.message = remove_urls(event.message.message) sent = 0 for chat in targets: await client.edit_message(chat.mirror_channel, chat.mirror_id, event.message.message) sent += 1 if sent > LIMIT_TO_WAIT: sent = 0 time.sleep(TIMEOUT_MIRRORING) except Exception as e: logger.error(e, exc_info=True)
def __getitem__(self, item): text = str(self.texts[item]) text = remove_urls(text) text = convert_emojis(text) text = normalize_whitespaces(text) label = self.labels[item] + 1 # cause of loss limitation encoding = self.tokenizer.encode_plus(text, add_special_tokens=True, max_length=self.max_len, return_token_type_ids=False, pad_to_max_length=True, return_attention_mask=True, return_tensors='pt', truncation=True) return { 'text': text, 'input_ids': encoding['input_ids'].flatten(), 'attention_mask': encoding['attention_mask'].flatten(), 'targets': torch.tensor(label, dtype=torch.long) }
config = utils.set_config epoch = config.epoch model = NNLM_Model(config = config) model.train() if model.use_gpu: model.to("cuda") else: model.to("cpu") loss = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr = model.lr) # load data data = utils.load_data("./data/Political-media-DFE.csv") data = utils.remove_urls(data) train_data = data[:100] test_data = data[100:120] test_data_list = utils.get_test_token(test_data) word2idx, idx2word, length, train_data_list = utils.get_unique_word(train_data) utils.mkdir_folder(config.save_model) # train epoch for ep in range(epoch): data_generator = utils.make_batch(train_data_list, word2idx, window_size=config.window_size, batch_size=config.batch_size, if_gpu=config.use_gpu) test_data_generator = utils.make_batch(test_data_list, word2idx,