def _save_checkpoint(self):
     filepath = env().MODEL_FILENAME + "" + "LARGE.h5"
     checkpoint = ModelCheckpoint(filepath,
                                  monitor='loss',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')
     callbacks_list = [checkpoint]
     return callbacks_list
def load_vocabularies():
    """ Load vocabularies from file

    :return: char_to_indx, idx_to_char, maq_seq_len
    """
    with open(env().path_to_vocabularies, "rb") as f_pickle:
        char_to_idx = pickle.load(f_pickle)
        idx_to_char = pickle.load(f_pickle)
        max_seq_len = pickle.load(f_pickle)
    return char_to_idx, idx_to_char, max_seq_len
Esempio n. 3
0
def main():
    dr = DataReader(env().data_dir)
    data, labels = dr.create_dataset()
    dr.save_data_csv()
    data, labels = dr.load_dataset()
    dg = DataGenerator(data, labels)
    data, labels = dg.generate_data()

    X_train, X_test, y_train, y_test = dg.split_train_test(env().DIM_TEST)

    dg.save_data(X_train, X_test, y_train, y_test)

    X_train, X_test, y_train, y_test, word_to_idx, idx_to_word, max_words_length = load_data(
    )

    print(len(X_train))

    langclassifier = LanguageClassifier(X_train, y_train, X_test, y_test)
    langclassifier.train_model()
Esempio n. 4
0
def load_data():
    with open(env().path_to_arrays, "rb") as f_pickle:
        X_tr = pickle.load(f_pickle)
        X_te = pickle.load(f_pickle)
        y_tr = pickle.load(f_pickle)
        y_te = pickle.load(f_pickle)
        word_to_idx = pickle.load(f_pickle)
        idx_to_word = pickle.load(f_pickle)
        max_words = pickle.load(f_pickle)
    return X_tr, X_te, y_tr, y_te, word_to_idx, idx_to_word, max_words
 def _build_baseline(self):
     self.model.add(
         Embedding(self.vocab_size,
                   env().EMBEDDING_DIM,
                   input_length=self.x_train.shape[1]))
     self.model.add(
         LSTM(56, return_sequences=True, dropout=0.1,
              recurrent_dropout=0.1))
     self.model.add(LSTM(56, dropout=0.1, recurrent_dropout=0.1))
     self.model.add(Dense(self.y_train.shape[1], activation='softmax'))
def load_data():
    """ Load the training and test arrays from file

    :return: X_training, X_test, y_training, y_test
    """
    with open(env().path_to_arrays, "rb") as f_pickle:
        X_tr = pickle.load(f_pickle)
        X_te = pickle.load(f_pickle)
        y_tr = pickle.load(f_pickle)
        y_te = pickle.load(f_pickle)
    return X_tr, X_te, y_tr, y_te
 def save_vocabularies(self):
     with open(env().path_to_vocabularies, 'wb') as f_pickle:
         pickle.dump(self.char_to_index,
                     f_pickle,
                     protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.index_to_char,
                     f_pickle,
                     protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.max_seq_len,
                     f_pickle,
                     protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 8
0
 def save_data_csv(self):
     if (len(self.data) > 0):
         try:
             with open(env().dataset_csv_path, mode="w",
                       encoding="utf-8") as f_csv:
                 for i in range(len(self.data)):
                     f_csv.write("{}\t{}\n".format(self.data[i],
                                                   self.labels[i]))
                 f_csv.close()
         except Exception as ex:
             print("Error: dataset not save - ", ex)
     else:
         print("Empty dataset!\n")
Esempio n. 9
0
 def load_dataset(self):
     data_list, labels_list = [], []
     with open(env().dataset_csv_path, mode="rt",
               encoding="utf-8") as f_csv:
         lines = f_csv.read().split('\n')
         for line in lines[:-1]:
             x, y = line.split('\t')
             data_list.append(x)
             labels_list.append(y)
         f_csv.close()
     # restore fields of object datareader
     self.data = data_list
     self.label = labels_list
     return self.data, self.label
Esempio n. 10
0
 def save_data(self, X_tr, X_te, y_tr, y_te):
     with open(env().path_to_arrays, 'wb') as f_pickle:
         pickle.dump(X_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(X_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(y_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(y_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.word_to_index,
                     f_pickle,
                     protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.index_to_word,
                     f_pickle,
                     protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.max_seq_len,
                     f_pickle,
                     protocol=pickle.HIGHEST_PROTOCOL)
    def __init__(self, x_train, y_train, x_test, y_test, vocab_size=None):
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.vocab_size = vocab_size

        self.batch_size = env().BATCH_SIZE
        self.embed_dim = env().EMBEDDING_DIM
        self.epochs = env().EPOCHS
        self.optimizer = env().OPTIMIZER
        self.units = env().UNITS
        self.model_filename = env().MODEL_FILENAME

        self.model = Sequential()
 def save_data(self, X_tr, X_te, y_tr, y_te):
     with open(env().path_to_arrays, 'wb') as f_pickle:
         pickle.dump(X_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(X_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(y_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
         pickle.dump(y_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)