from melusine.prepare_email.mail_segmenting import split_message_to_sentences from melusine.utils.multiprocessing import apply_by_multiprocessing from melusine.nlp_tools.tokenizer import Tokenizer from melusine.config.config import ConfigJsonReader conf_reader = ConfigJsonReader() class Streamer: """Class to transform pd.Series into stream. Used to prepare the data for the training of the phraser and embeddings. Attributes ---------- column : str, Input text column(s) to consider for the streamer. stream : MailIterator object, Stream of all the tokens of the pd.Series. Examples -------- >>> streamer = Streamer() >>> streamer.to_stream(X) # will build the stream attribute >>> tokens_stream = = streamer.stream >>> print(tokens_stream) """ def __init__(self, stop_removal=False, column="clean_body", n_jobs=1): self.column_ = column
from sklearn.base import BaseEstimator, ClassifierMixin from tensorflow.keras.utils import to_categorical from tensorflow.keras.models import model_from_json from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import TensorBoard from transformers import CamembertTokenizer, XLMTokenizer from transformers import TFCamembertModel, TFFlaubertModel from melusine.config.config import ConfigJsonReader from melusine.nlp_tools.tokenizer import Tokenizer from melusine.models.attention_model import PositionalEncoding from melusine.models.attention_model import TransformerEncoderLayer from melusine.models.attention_model import MultiHeadAttention conf_reader = ConfigJsonReader() config = conf_reader.get_config_file() tensorboard_callback_parameters = config["tensorboard_callback"] class NeuralModel(BaseEstimator, ClassifierMixin): """Generic class for neural models. It is compatible with scikit-learn API (i.e. contains fit, transform methods). Parameters ---------- neural_architecture_function : function, Function which returns a Model instance from Keras. Implemented model functions are: cnn_model, rnn_model, transformers_model, bert_model