def get_RandomForest_model(model_name, embedding_dim=embedding.embedding_dim, train_data=None, load_model=True, train_model=False, save_model=True, test_data=None, build_params=None, train_params=None, **kwargs): """ Creates a new instance of RF_classi Parameters: :param model_name: (str) the name of the model to create, or the name of the model to load. :param embedding_dim: (int) dimension of the embedding space :param train_data : (np.ndarray) the training data in a single matrix (like the one produced by the embedding.pipeline.build_embedding_matrix method :param load_model: (bool) whether to load the model from file. :param train_model: (bool) whether to train the model. :param save_model: (bool) whether to save the model :param test_data: (np.ndarray) if not None, the model will be tested against this test data. :param build_params: (dict) dictionary of parameters to pass to build the model Example : {c:1, kernel:'rbf', } :param train_params: (dict) dictionary of parameters to pass to build the model Example : {validation_split:0.2} :return: an instance of RF_classi class """ vocabulary = kwargs.get("vocabulary") if not vocabulary: vocabulary = standard_vocab_name vocab_dim = get_vocab_dimension(vocabulary) load_embedding = kwargs.get("load_embedding", True) embedding_name = kwargs.get("embedding_location", "glove_emb.npz") generator_mode = kwargs.get("generator_mode", False) max_len = kwargs.get("max_len", 100) if load_embedding: glove_embedding = get_glove_embedding( vocabulary_file=vocabulary, load_from_file=True, load_Stanford=False, # no need to reload the stanford embedding when we already load the embedding matrix from file file_name=embedding_name, train=False, save=False) embedding_matrix = glove_embedding.embedding_matrix ourRF = RF_classi(embedding_dim, model_name) ourRF.build(**build_params) if load_model: ourRF.load() if train_model: print("train_model", train_model) x_train, y_train = None, None if not generator_mode: print(generator_mode) x_train = train_data[:, 0:-1] y_train = train_data[:, -1] generator_params = { "embedding": glove_embedding, "input_files": [ replaced_train_negative_location, replaced_train_positive_location ], "input_entries": full_dimension, "max_len": max_len } ourRF.train(x_train, y_train, generator_mode=generator_mode, **generator_params, **train_params) if save_model: ourRF.save() if test_data is not None: x_test = test_data[:, 0:-1] y_test = test_data[:, -1] ourRF.test(x_test, y_test) if save_model: ourRF.save() return ourRF
def get_convolutional_model(model_name, embedding_dim=embedding.embedding_dim, train_data=None, load_model=False, train_model=False, save_model=False, test_data=None, build_params=None, train_params=None, **kwargs): """ Creates a new instance of convolutional_NN Parameters: :param model_name: (str) the name of the model to create, or the name of the model to load. :param embedding_dim: (int) dimension of the embedding space :param train_data : (np.ndarray) the training data in a single matrix (like the one produced by the embedding.pipeline.build_embedding_matrix method :param load_model: (bool) whether to load the model from file. :param train_model: (bool) whether to train the model. :param save_model: (bool) whether to save the model :param test_data: (np.ndarray) if not None, the model will be tested against this test data. :param build_params: (dict) dictionary of parameters to pass to build the model :param train_params: (dict) dictionary of parameters to pass to build the model :param kwargs: additional arguments Arguments accepted: - :arg load_embedding: (bool) whether to load an embedding matrix into the classifier (if false, the classifier will learn the embedding from scratch) - :arg embedding_location: (str) - only used if the above parameter is true- path to the file that stores the embedding matrix - :arg vocabulary: (str) vocabulary in use :return: an instance of Vanilla_NN class """ vocabulary = kwargs.get("vocabulary") if not vocabulary: vocabulary = standard_vocab_name vocab_dim = get_vocab_dimension(vocabulary) # -------------------- # Opening pre-trained embedding matrix load_embedding = kwargs.get("load_embedding") embedding_name = kwargs.get("embedding_location", "glove_emb.npz") generator_mode = kwargs.get("generator_mode", False) if load_embedding: glove_embedding = get_glove_embedding( vocabulary_file=vocabulary, load_from_file=True, load_Stanford= False, #no need to reload the stanford embedding when we already load the embedding matrix from file file_name=embedding_name, train=False, save=False) embedding_matrix = glove_embedding.embedding_matrix # ------------------- # Building the model convolutional_fun = convolutional_NN convolutional = convolutional_fun(embedding_dimension=embedding_dim, vocabulary_dimension=vocab_dim, name=model_name, embedding_matrix=embedding_matrix) convolutional.build(**build_params) if load_model: convolutional.load() # ---------------- # Training, testing and saving if train_model: x_train, y_train = None, None if not generator_mode: x_train = train_data[:, 0:-1] y_train = train_data[:, -1] convolutional.train(x_train, y_train, generator_mode=generator_mode, **train_params) if save_model: convolutional.save() if test_data is not None: idx2word = load_inverse_vocab(vocabulary) x_test = test_data[:, 0:-1] y_test = test_data[:, -1] convolutional.test(x_test, y_test, idx2word=idx2word) return convolutional
def get_ET_model(model_name, train_data=None, load_model=False, train_model=False, save_model=False, test_data=None, build_params=None, train_params=None, **kwargs): """ Creates a new instance of convolutional_NN Parameters: :param model_name: (str) the name of the model to create, or the name of the model to load. :param train_data : (np.ndarray) the training data in a single matrix (like the one produced by the embedding.pipeline.build_embedding_matrix method :param load_model: (bool) whether to load the model from file. :param train_model: (bool) whether to train the model. :param save_model: (bool) whether to save the model :param test_data: (np.ndarray) if not None, the model will be tested against this test data. :param build_params: (dict) dictionary of parameters to pass to build the model :param train_params: (dict) dictionary of parameters to pass to build the model """ number_of_embeddings = kwargs.get("number_of_embeddings") vocabularies = kwargs.get("vocabularies") embedding_locations = kwargs.get("embedding_locations") assert not vocabularies is None and not number_of_embeddings is None and not embedding_locations is None, \ "Usage error. To use the met network you need to specify the embeddings and vocabularies to use." # -------------------- # Opening pre-trained embedding matrix embeddings = [] for i in range(number_of_embeddings): #Note: the "get glove embedding matrix can load multiple embeddings" emb = get_glove_embedding( vocabulary_file=vocabularies[i], load_from_file=True, load_Stanford= False, #no need to reload the stanford embedding when we already load the embedding matrix from file file_name=embedding_locations[i], train=False, save=False) embedding_matrix = emb.embedding_matrix embeddings.append(embedding_matrix) # ------------------- # Building the model if number_of_embeddings == 1: my_transformer = etransformer_NN( embedding_dimension=embeddings[0].shape[1], vocabulary_dimension=get_vocab_dimension(vocabularies[0]), embedding_matrices=embeddings[0], number_of_embeddings=number_of_embeddings, name=model_name) else: my_transformer = metransformer_NN( embedding_dimension=-1, embedding_matrices=embeddings, number_of_embeddings=number_of_embeddings, name=model_name) my_transformer.build(**build_params) if load_model: my_transformer.load() # ---------------- # Training, testing and saving if train_model: x_train = train_data[:, 0:-1] y_train = train_data[:, -1] my_transformer.train(x_train, y_train, generator_mode=False, **train_params) if save_model: my_transformer.save() if test_data is not None: idx2word = None if number_of_embeddings == 1: idx2word = load_inverse_vocab(vocabularies[0]) x_test = test_data[:, 0:-1] y_test = test_data[:, -1] my_transformer.test(x_test, y_test, idx2word=idx2word) return my_transformer
def get_recurrent_model(model_name, embedding_dim=embedding.embedding_dim, train_data=None, load_model=False, train_model=False, save_model=False, test_data=None, build_params=None, train_params=None, **kwargs): """ Creates a new instance of recurrent_NN Parameters: :param model_name: (str) the name of the model to create, or the name of the model to load. :param embedding_dim: (int) dimension of the embedding space :param train_data : (np.ndarray) the training data in a single matrix (like the one produced by the embedding.pipeline.build_embedding_matrix method :param load_model: (bool) whether to load the model from file. :param train_model: (bool) whether to train the model. :param save_model: (bool) whether to save the model :param test_data: (np.ndarray) if not None, the model will be tested against this test data. :param build_params: (dict) dictionary of parameters to pass to build the model ``` >>> Example : build_params = {"activation":'relu', \ "loss":"binary_crossentropy",\ "metrics":None,\ "cell_type":"LSTM",\ "num_layers":3,\ "hidden_size":64,\ "train_embedding":False,\ "use_attention":False, \ "optimizer":"rmsprop"} ``` :param train_params: (dict) dictionary of parameters to pass to build the model >>> Example : {"epochs":10, \ "batch_size":32, \ "validation_split":0.2} :param kwargs: additional arguments Arguments accepted: - :arg load_embedding: (bool) whether to load an embedding matrix into the classifier (if false, the classifier will learn the embedding from scratch) - :arg embedding_location: (str) - only used if the above parameter is true- path to the file that stores the embedding matrix - :arg vocabulary: (str) vocabulary in use :return: an instance of Vanilla_NN class """ vocabulary = kwargs.get("vocabulary") if not vocabulary: vocabulary = standard_vocab_name vocab_dim = get_vocab_dimension(vocabulary) # -------------------- # Opening pre-trained embedding matrix load_embedding = kwargs.get("load_embedding") embedding_name = kwargs.get("embedding_location", "glove_emb.npz") generator_mode = kwargs.get("generator_mode", True) max_len = kwargs.get("max_len", 100) if load_embedding: glove_embedding = get_glove_embedding( vocabulary_file=vocabulary, load_from_file=True, load_Stanford= False, #no need to reload the stanford embedding when we already load the embedding matrix from file file_name=embedding_name, train=False, save=False) embedding_matrix = glove_embedding.embedding_matrix # ------------------- # Building the model use_attention = build_params.get("use_attention") recurrent_fun = recurrent_NN if use_attention: recurrent_fun = attention_NN recurrent = recurrent_fun(embedding_dimension=embedding_dim, vocabulary_dimension=vocab_dim, name=model_name, embedding_matrix=embedding_matrix) recurrent.build(**build_params) if load_model: recurrent.load() # ---------------- # Training, testing and saving if train_model: x_train, y_train = None, None print("generator mode ", generator_mode) if not generator_mode: x_train = train_data[:, 0:-1] y_train = train_data[:, -1] generator_params = { "embedding": glove_embedding, "input_files": [train_negative_location, train_positive_location], "input_entries": full_dimension, "max_len": max_len } recurrent.train(x_train, y_train, generator_mode=generator_mode, **generator_params, **train_params) if save_model: recurrent.save() if test_data is not None: idx2word = load_inverse_vocab(vocabulary) x_test = test_data[:, 0:-1] y_test = test_data[:, -1] recurrent.test(x_test, y_test, idx2word=idx2word) # --------------- # Visualization visualize_attention = kwargs.get("visualize_attention", train_model) sentence_pos = "I'm loving this project, let's keep on working guys!" sentence_neg = "I hate bugs, but not as much as I hate cooking." if visualize_attention and use_attention: #Note: visualization can only be used with the attention model # 1. get the vectorised representation of the sentence sentence_pos_vec = no_embeddings(sentence_pos, embedding=glove_embedding) sentence_neg_vec = no_embeddings(sentence_neg, embedding=glove_embedding) # 2. get the attention plot recurrent.visualize_attention(sentence_pos, sentence_pos_vec) recurrent.visualize_attention(sentence_neg, sentence_neg_vec) return recurrent