Beispiel #1
0
def extract_ngrams(user: UserModel, sentences: List[SentenceModel], size: int, language: str) -> DataFrame:
    cipher = load_cipher(user)
    pre_processed_sentences = [
        cipher.decrypt(sentence.pre_processed_content.encode()).decode()
        for sentence in sentences
    ]
    ngrams = generate_ngrams(pre_processed_sentences, size, language)
    return ngrams
Beispiel #2
0
def extract_ner(user: UserModel, sentences: List[SentenceModel],
                language: str) -> DataFrame:
    cipher = load_cipher(user)
    pre_processed_sentences = map(
        lambda x: cipher.decrypt(x.pre_processed_content.encode()).decode(),
        sentences)
    ner_sentences = extract_ner_from_sentences(pre_processed_sentences,
                                               language)
    return ner_sentences
Beispiel #3
0
 def get_wordcloud(self) -> WordcloudModel:
     wcs = get_wordclouds_from_datafile(self.datafile)
     wc: WordcloudModel = wcs.first()
     if wc:
         private_cipher = load_cipher(self.user)
         wc.base64_image = private_cipher.decrypt(
             wc.base64_image.encode()).decode()
         return wc
     else:
         raise FileNotFoundError("Ooops, nada por aqui ainda....")
Beispiel #4
0
def create_base64_wordcloud(user: UserModel, sentences: List[SentenceModel],
                            language: str) -> bytes:
    cipher = load_cipher(user)
    pre_processed_sentences = [
        cipher.decrypt(sentence.pre_processed_content.encode()).decode()
        for sentence in sentences
    ]
    wordcloud = generate_wordcloud(pre_processed_sentences, language)
    image = wordcloud.to_image()
    buffer = BytesIO()
    image.save(buffer, "JPEG")
    image_base64 = base64.b64encode(buffer.getvalue())
    return image_base64
Beispiel #5
0
def list_sentences_from_datafile(
        datafile: DataFileModel, skip: int,
        limit: int) -> Union[List[SaveSentenceTaskModel], int]:
    sentences = SentenceModel.objects(datafile=datafile)
    total = sentences.count()
    sentences_pag = sentences.skip(skip).limit(limit)
    cipher = load_cipher(datafile.owner)
    for s in sentences_pag:
        s.content = cipher.decrypt(s.content.encode()).decode()
        s.pre_processed_content = cipher.decrypt(
            s.pre_processed_content.encode()).decode()

    return sentences_pag, total
Beispiel #6
0
def process_task(save_sentence_task: SaveSentenceTaskModel) -> bool:
    # preprocessa a sentença
    cipher = load_cipher(save_sentence_task.owner)

    try:
        preprocessed_sentence = preprocess_sentence(
            cipher.decrypt(save_sentence_task.content.encode()).decode(),
            save_sentence_task.datafile.language)

    except Exception as e:
        save_sentence_task.status = "error"
        save_sentence_task.error = "Erro ao preprocessar a sentenca"
        save_sentence_task.save()
        logger.error("Erro ao preprocessar a sentenca",
                     exc_info=True,
                     extra={"received_args": save_sentence_task.to_mongo()})
        return False

    # salva a sentença

    try:
        schema = SentenceSchema()
        sentence: SentenceModel = schema.load({
            "datafile":
            save_sentence_task.datafile,
            "index":
            save_sentence_task.index,
            "content":
            save_sentence_task.content,
            "pre_processed_content":
            cipher.encrypt(preprocessed_sentence.encode()).decode()
        })
        sentence.save()
    except NotUniqueError:
        pass

    except Exception as e:
        save_sentence_task.status = "error"
        save_sentence_task.error = "Erro ao salvar a sentença"
        save_sentence_task.save()
        logger.error("Erro ao salvar a sentença",
                     exc_info=True,
                     extra={"received_args": save_sentence_task.to_mongo()})
        return False

    # Atualiza o status da tarefa
    save_sentence_task.progress = 1
    save_sentence_task.status = "success"
    save_sentence_task.save()

    return True
Beispiel #7
0
def import_sentences_from_df(
        df: DataFrame, datafile: DataFileModel,
        datafile_import_task: DataFileUploadTaskModel) -> None:
    logger.info("Importando sentenças",
                extra={"received_args": {
                    "datafile": datafile.id
                }})

    user = datafile.owner
    cipher = load_cipher(user)

    datafile_import_task.status = "queued"
    datafile_import_task.save()
    text_column = datafile.text_column
    try:

        schema = SentenceSaveTaskSchema()
        producer = RabbitProducer("NLEaser.sentence_import")
        for index, row in df.iterrows():
            sentence_import_task: SaveSentenceTaskModel = schema.load({
                "owner":
                str(datafile.owner.id),
                "datafile":
                str(datafile.id),
                "parent":
                str(datafile_import_task.id),
                "total":
                1,
                "content":
                cipher.encrypt(str(row[text_column]).encode()).decode(),
                "index":
                index
            })
            sentence_import_task.save()
            producer.send_message(
                json.dumps({"task": str(sentence_import_task.id)}))
    except Exception as e:
        datafile_import_task.status = "error"
        datafile_import_task.error = "Erro desconhecido ao importar as sentenças"
        datafile_import_task.save()
        logger.error("Erro ao importar sentenças do dataframe",
                     exc_info=True,
                     extra={
                         "received_args": {
                             "datafile": datafile.id,
                             "upload": datafile_import_task.id,
                             "text_column": text_column
                         }
                     })
Beispiel #8
0
def process_task(wordcloud_create_task: WordcloudCreateTaskModel) -> bool:
    # Recupera as sentenças
    wordcloud_create_task.status = "in_progress"
    wordcloud_create_task.total = 3
    wordcloud_create_task.progress = 1
    wordcloud_create_task.save()
    try:
        sentences: List[SentenceModel] = SentenceModel.objects(
            datafile=wordcloud_create_task.datafile).all()
    except Exception as e:
        wordcloud_create_task.status = "error"
        wordcloud_create_task.error = "Erro ao importar as sentenças desse arquivo"
        wordcloud_create_task.save()
        logger.error(wordcloud_create_task.error,
                     exc_info=True,
                     extra={"received_args": wordcloud_create_task.to_mongo()})
        return False

    # gera o wc em base64

    try:
        base64_image = create_base64_wordcloud(
            wordcloud_create_task.owner, sentences,
            wordcloud_create_task.datafile.language)
        wordcloud_create_task.progress += 1
        wordcloud_create_task.save()
    except Exception as e:
        wordcloud_create_task.status = "error"
        wordcloud_create_task.error = "Erro ao gerar o wordcloud em base64"
        wordcloud_create_task.save()
        logger.error(wordcloud_create_task.error,
                     exc_info=True,
                     extra={"received_args": wordcloud_create_task.to_mongo()})
        return False

    # Salva o wc

    try:
        cipher = load_cipher(wordcloud_create_task.owner)
        schema = WordcloudSchema()
        model: WordcloudModel = schema.load({
            "datafile":
            wordcloud_create_task.datafile,
            "base64_image":
            cipher.encrypt(base64_image).decode()
        })
        model.save()
        wordcloud_create_task.progress += 1
        wordcloud_create_task.save()

    except Exception as e:
        wordcloud_create_task.status = "error"
        wordcloud_create_task.error = "Erro ao salvar o WordCloud"
        wordcloud_create_task.save()
        logger.error(wordcloud_create_task.error,
                     exc_info=True,
                     extra={"received_args": wordcloud_create_task.to_mongo()})
        return False
    wordcloud_create_task.status = "success"
    wordcloud_create_task.save()

    return True