Exemple #1
0
def remove_url_from_message(message):
    message.message = remove_urls(message.message)
    if message.entities is not None:
        for e in message.entities:
            if isinstance(e, MessageEntityTextUrl):
                e.url = remove_urls(e.url)
    return message
Exemple #2
0
    def __getitem__(self, index):
        text = self.text[index]
        text = utils.lower(text)
        text = utils.remove_hashtags(text)
        text = utils.remove_user_mentions(text)
        text = utils.remove_urls(text)

        data = np.array([
            config.identity_mat[config.vocabulary.index(i)]
            for i in list(text)[::-1] if i in config.vocabulary
        ],
                        dtype=np.float32)
        if len(data) > config.max_length:
            data = data[:config.max_length]
        elif 0 < len(data) < config.max_length:
            data = np.concatenate((data,
                                   np.zeros((config.max_length - len(data),
                                             config.number_of_characters),
                                            dtype=np.float32)))
        elif len(data) == 0:
            data = np.zeros((config.max_length, config.number_of_characters),
                            dtype=np.float32)

        label = self.labels[index]
        data = torch.Tensor(data)

        return data, label
Exemple #3
0
 def create_poemline(self, string):
     # original text
     self.originalString = string
     # texts without symbols or urls
     self.cleanString = remove_symbols(remove_urls(string))
     # number of syllables in line
     self.syl = line_syl(self.cleanString)
     # rhymes against last word
     self.rhymes = rhyme(string.split()[-1], self.RHYME_LEVEL)
Exemple #4
0
async def handler_new_message(event):
    """NewMessage event handler.
    """
    # skip if Album
    if hasattr(event, 'grouped_id') and event.grouped_id is not None:
        return
    try:
        logger.debug(f'New message from {event.chat_id}:\n{event.message}')
        targets = CHANNEL_MAPPING.get(event.chat_id)
        if targets is None or len(targets) < 1:
            logger.warning(
                f'NewMessage. No target channel for {event.chat_id}')
            return
        if REMOVE_URLS:
            event.message.message = remove_urls(event.message.message)
            for e in event.message.entities:
                if isinstance(e, MessageEntityTextUrl):
                    e.url = remove_urls(e.url)
        sent = 0
        for chat in targets:
            mirror_message = None
            if isinstance(event.message.media, MessageMediaPoll):
                mirror_message = await client.send_message(
                    chat, file=InputMediaPoll(poll=event.message.media.poll))
            else:
                mirror_message = await client.send_message(chat, event.message)

            if mirror_message is not None:
                db.insert(
                    MirrorMessage(original_id=event.message.id,
                                  original_channel=event.chat_id,
                                  mirror_id=mirror_message.id,
                                  mirror_channel=chat))
            sent += 1
            if sent > LIMIT_TO_WAIT:
                sent = 0
                time.sleep(TIMEOUT_MIRRORING)

    except Exception as e:
        logger.error(e, exc_info=True)
Exemple #5
0
async def handler_edit_message(event):
    try:
        logger.debug('Edit message')
        mirror_message = database.find_by_original_id(event.message.id,
                                                      event.chat_id)
        if mirror_message is None:
            return
        id_message_to_edit = mirror_message['mirror_id']
        result = await client(
            functions.channels.GetMessagesRequest(channel=TARGET_CHAT,
                                                  id=[id_message_to_edit]))
        message_to_edit = result.messages[0]
        if REMOVE_URLS:
            event.message.message = remove_urls(event.message.message)
        await client.edit_message(message_to_edit, event.message.message)
    except Exception as e:
        logger.error(e, exc_info=True)
Exemple #6
0
async def handler_new_message(event):
    try:
        logger.debug(f'New message:\n{event.message}')
        if REMOVE_URLS:
            event.message.message = remove_urls(event.message.message)
        mirror_id = None
        if (isinstance(event.message.media, MessageMediaPoll)):
            mirror_id = await client.send_message(
                TARGET_CHAT,
                file=InputMediaPoll(poll=event.message.media.poll))
        else:
            mirror_id = await client.send_message(TARGET_CHAT, event.message)
        database.insert({
            'original_id': event.message.id,
            'mirror_id': mirror_id.id,
            'original_channel': event.chat_id
        })
    except Exception as e:
        logger.error(e, exc_info=True)
Exemple #7
0
async def handler_edit_message(event):
    """MessageEdited event handler.
    """
    try:
        logger.debug(f'Edit message {event.message.id} from {event.chat_id}')
        targets = db.find_by_original_id(event.message.id, event.chat_id)
        if targets is None or len(targets) < 1:
            logger.warning(
                f'MessageEdited. No target channel for {event.chat_id}')
            return
        if REMOVE_URLS:
            event.message.message = remove_urls(event.message.message)
        sent = 0
        for chat in targets:
            await client.edit_message(chat.mirror_channel, chat.mirror_id,
                                      event.message.message)
            sent += 1
            if sent > LIMIT_TO_WAIT:
                sent = 0
                time.sleep(TIMEOUT_MIRRORING)
    except Exception as e:
        logger.error(e, exc_info=True)
Exemple #8
0
    def __getitem__(self, item):
        text = str(self.texts[item])
        text = remove_urls(text)
        text = convert_emojis(text)
        text = normalize_whitespaces(text)
        label = self.labels[item] + 1  # cause of loss limitation

        encoding = self.tokenizer.encode_plus(text,
                                              add_special_tokens=True,
                                              max_length=self.max_len,
                                              return_token_type_ids=False,
                                              pad_to_max_length=True,
                                              return_attention_mask=True,
                                              return_tensors='pt',
                                              truncation=True)

        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'targets': torch.tensor(label, dtype=torch.long)
        }
Exemple #9
0
    config = utils.set_config
    epoch = config.epoch

    model = NNLM_Model(config = config)
    model.train()
    if model.use_gpu:
        model.to("cuda")
    else:
        model.to("cpu")

    loss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr = model.lr)

    # load data
    data = utils.load_data("./data/Political-media-DFE.csv")
    data = utils.remove_urls(data)
    train_data = data[:100]
    test_data = data[100:120]
    test_data_list = utils.get_test_token(test_data)

    word2idx, idx2word, length, train_data_list = utils.get_unique_word(train_data)

    utils.mkdir_folder(config.save_model)

    # train epoch
    for ep in range(epoch):
        data_generator = utils.make_batch(train_data_list, word2idx,
                                          window_size=config.window_size,
                                          batch_size=config.batch_size,
                                          if_gpu=config.use_gpu)
        test_data_generator = utils.make_batch(test_data_list, word2idx,