def extract_ngrams(user: UserModel, sentences: List[SentenceModel], size: int, language: str) -> DataFrame: cipher = load_cipher(user) pre_processed_sentences = [ cipher.decrypt(sentence.pre_processed_content.encode()).decode() for sentence in sentences ] ngrams = generate_ngrams(pre_processed_sentences, size, language) return ngrams
def extract_ner(user: UserModel, sentences: List[SentenceModel], language: str) -> DataFrame: cipher = load_cipher(user) pre_processed_sentences = map( lambda x: cipher.decrypt(x.pre_processed_content.encode()).decode(), sentences) ner_sentences = extract_ner_from_sentences(pre_processed_sentences, language) return ner_sentences
def get_wordcloud(self) -> WordcloudModel: wcs = get_wordclouds_from_datafile(self.datafile) wc: WordcloudModel = wcs.first() if wc: private_cipher = load_cipher(self.user) wc.base64_image = private_cipher.decrypt( wc.base64_image.encode()).decode() return wc else: raise FileNotFoundError("Ooops, nada por aqui ainda....")
def create_base64_wordcloud(user: UserModel, sentences: List[SentenceModel], language: str) -> bytes: cipher = load_cipher(user) pre_processed_sentences = [ cipher.decrypt(sentence.pre_processed_content.encode()).decode() for sentence in sentences ] wordcloud = generate_wordcloud(pre_processed_sentences, language) image = wordcloud.to_image() buffer = BytesIO() image.save(buffer, "JPEG") image_base64 = base64.b64encode(buffer.getvalue()) return image_base64
def list_sentences_from_datafile( datafile: DataFileModel, skip: int, limit: int) -> Union[List[SaveSentenceTaskModel], int]: sentences = SentenceModel.objects(datafile=datafile) total = sentences.count() sentences_pag = sentences.skip(skip).limit(limit) cipher = load_cipher(datafile.owner) for s in sentences_pag: s.content = cipher.decrypt(s.content.encode()).decode() s.pre_processed_content = cipher.decrypt( s.pre_processed_content.encode()).decode() return sentences_pag, total
def process_task(save_sentence_task: SaveSentenceTaskModel) -> bool: # preprocessa a sentença cipher = load_cipher(save_sentence_task.owner) try: preprocessed_sentence = preprocess_sentence( cipher.decrypt(save_sentence_task.content.encode()).decode(), save_sentence_task.datafile.language) except Exception as e: save_sentence_task.status = "error" save_sentence_task.error = "Erro ao preprocessar a sentenca" save_sentence_task.save() logger.error("Erro ao preprocessar a sentenca", exc_info=True, extra={"received_args": save_sentence_task.to_mongo()}) return False # salva a sentença try: schema = SentenceSchema() sentence: SentenceModel = schema.load({ "datafile": save_sentence_task.datafile, "index": save_sentence_task.index, "content": save_sentence_task.content, "pre_processed_content": cipher.encrypt(preprocessed_sentence.encode()).decode() }) sentence.save() except NotUniqueError: pass except Exception as e: save_sentence_task.status = "error" save_sentence_task.error = "Erro ao salvar a sentença" save_sentence_task.save() logger.error("Erro ao salvar a sentença", exc_info=True, extra={"received_args": save_sentence_task.to_mongo()}) return False # Atualiza o status da tarefa save_sentence_task.progress = 1 save_sentence_task.status = "success" save_sentence_task.save() return True
def import_sentences_from_df( df: DataFrame, datafile: DataFileModel, datafile_import_task: DataFileUploadTaskModel) -> None: logger.info("Importando sentenças", extra={"received_args": { "datafile": datafile.id }}) user = datafile.owner cipher = load_cipher(user) datafile_import_task.status = "queued" datafile_import_task.save() text_column = datafile.text_column try: schema = SentenceSaveTaskSchema() producer = RabbitProducer("NLEaser.sentence_import") for index, row in df.iterrows(): sentence_import_task: SaveSentenceTaskModel = schema.load({ "owner": str(datafile.owner.id), "datafile": str(datafile.id), "parent": str(datafile_import_task.id), "total": 1, "content": cipher.encrypt(str(row[text_column]).encode()).decode(), "index": index }) sentence_import_task.save() producer.send_message( json.dumps({"task": str(sentence_import_task.id)})) except Exception as e: datafile_import_task.status = "error" datafile_import_task.error = "Erro desconhecido ao importar as sentenças" datafile_import_task.save() logger.error("Erro ao importar sentenças do dataframe", exc_info=True, extra={ "received_args": { "datafile": datafile.id, "upload": datafile_import_task.id, "text_column": text_column } })
def process_task(wordcloud_create_task: WordcloudCreateTaskModel) -> bool: # Recupera as sentenças wordcloud_create_task.status = "in_progress" wordcloud_create_task.total = 3 wordcloud_create_task.progress = 1 wordcloud_create_task.save() try: sentences: List[SentenceModel] = SentenceModel.objects( datafile=wordcloud_create_task.datafile).all() except Exception as e: wordcloud_create_task.status = "error" wordcloud_create_task.error = "Erro ao importar as sentenças desse arquivo" wordcloud_create_task.save() logger.error(wordcloud_create_task.error, exc_info=True, extra={"received_args": wordcloud_create_task.to_mongo()}) return False # gera o wc em base64 try: base64_image = create_base64_wordcloud( wordcloud_create_task.owner, sentences, wordcloud_create_task.datafile.language) wordcloud_create_task.progress += 1 wordcloud_create_task.save() except Exception as e: wordcloud_create_task.status = "error" wordcloud_create_task.error = "Erro ao gerar o wordcloud em base64" wordcloud_create_task.save() logger.error(wordcloud_create_task.error, exc_info=True, extra={"received_args": wordcloud_create_task.to_mongo()}) return False # Salva o wc try: cipher = load_cipher(wordcloud_create_task.owner) schema = WordcloudSchema() model: WordcloudModel = schema.load({ "datafile": wordcloud_create_task.datafile, "base64_image": cipher.encrypt(base64_image).decode() }) model.save() wordcloud_create_task.progress += 1 wordcloud_create_task.save() except Exception as e: wordcloud_create_task.status = "error" wordcloud_create_task.error = "Erro ao salvar o WordCloud" wordcloud_create_task.save() logger.error(wordcloud_create_task.error, exc_info=True, extra={"received_args": wordcloud_create_task.to_mongo()}) return False wordcloud_create_task.status = "success" wordcloud_create_task.save() return True