def __init__(self, modelpath, input_max_len=INPUT_MAX_LEN, output_max_len=OUTPUT_MAX_LEN): self.modelpath = modelpath meta = os.path.basename(self.modelpath).split('_') self.model_name = meta[0] self.x_max_len = input_max_len self.y_max_len = output_max_len self.x_ix_to_word = load_index(f'{self.model_name}_input_index.csv') self.y_ix_to_word = load_index(f'{self.model_name}_output_index.csv') self.embedding_dim = int(meta[4]) self.hidden_dim = int(meta[5]) self.layer_num = int(meta[6]) self.learning_rate = float(meta[8][0:-5]) self.dropout = float(meta[7]) self.model = AttentionModel(self.model_name, self.x_max_len, len(self.x_ix_to_word), self.y_max_len, len(self.y_ix_to_word), self.hidden_dim, self.layer_num, self.learning_rate, self.dropout, self.embedding_dim) self.model.load_weights(self.modelpath)
class CardinalLSTMTransformer(TransformerMixin, BaseEstimator): def __init__(self, modelpath, input_max_len=INPUT_MAX_LEN, output_max_len=OUTPUT_MAX_LEN): self.modelpath = modelpath meta = os.path.basename(self.modelpath).split('_') self.model_name = meta[0] self.x_max_len = input_max_len self.y_max_len = output_max_len self.x_ix_to_word = load_index(f'{self.model_name}_input_index.csv') self.y_ix_to_word = load_index(f'{self.model_name}_output_index.csv') self.embedding_dim = int(meta[4]) self.hidden_dim = int(meta[5]) self.layer_num = int(meta[6]) self.learning_rate = float(meta[8][0:-5]) self.dropout = float(meta[7]) self.model = AttentionModel(self.model_name, self.x_max_len, len(self.x_ix_to_word), self.y_max_len, len(self.y_ix_to_word), self.hidden_dim, self.layer_num, self.learning_rate, self.dropout, self.embedding_dim) self.model.load_weights(self.modelpath) def fit(self, X: pd.DataFrame, y=None, *args, **kwargs): return self def transform(self, df: pd.DataFrame, y=None, *args, **kwargs): cardinal_ixs = df[df['class'] == 'CARDINAL'].index x_series = (df.loc[cardinal_ixs, 'prev_prev'].map(str) + ' ' \ + df.loc[cardinal_ixs, 'prev'].map(str) + ' ' \ + df.loc[cardinal_ixs, 'before'].map(lambda s: ' '.join(list(s))) + ' ' \ + df.loc[cardinal_ixs, 'next'].map(str) + ' ' \ + df.loc[cardinal_ixs, 'next_next'].map(str)).str.lower() x, _, _ = prepare_matrix(x_series, self.x_max_len, len(self.x_ix_to_word), f'{self.model_name}_input_index.csv') del x_series y_predict = words_list(self.model.test(x), self.y_ix_to_word) del x if 'after' in df.columns: return df.assign(after=df['after'].combine_first( pd.Series(y_predict, index=cardinal_ixs))) else: return df.assign( after=pd.Series(y_predict, index=cardinal_ixs, name='after'))
def train(model_name, df: pd.DataFrame, input_max_len, input_vocab_len, output_max_len, output_vocab_len, hidden_dim, layer_num, learning_rate, dropout, embedding_dim, epochs, mem_size, batch_size): X, X_ix_to_word, X_word_to_ix = prepare_matrix(df['before'], input_max_len, input_vocab_len, f'{model_name}_input_index.csv') y, y_ix_to_word, y_word_to_ix = prepare_matrix(df['after'], output_max_len, output_vocab_len, f'{model_name}_output_index.csv') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.04) print(f'x train type={X_train.dtype}, ' f'size={X_train.shape}, ' f'density={X_train.nnz / X_train.shape[0] / X_train.shape[1]},' f'{sparse_memory_usage(X_train):9.3} Mb') print(f'y train type={y_train.dtype}, ' f'size={y_train.shape}, ' f'density={y_train.nnz / y_train.shape[0] / y_train.shape[1]},' f'{sparse_memory_usage(y_train):9.3} Mb') print(f'x test type={X_test.dtype}, ' f'size={X_test.shape}, ' f'density={X_test.nnz / X_test.shape[0] / X_test.shape[1]},' f'{sparse_memory_usage(X_test):9.3} Mb') print(f'y test type={y_test.dtype}, ' f'size={y_test.shape}, ' f'density={y_test.nnz / y_test.shape[0] / y_test.shape[1]},' f'{sparse_memory_usage(y_test):9.3} Mb') del X, y gc.collect() model = AttentionModel(model_name, input_max_len, len(X_ix_to_word), output_max_len, len(y_ix_to_word), hidden_dim, layer_num, learning_rate, dropout, embedding_dim) model.train(X_train, y_train, X_test, y_test, epochs, mem_size, batch_size) y_predict = model.test(X_test) print('array acc', np.mean(np.all(y_predict == y_test, axis=1))) y_predict_str = words_list(y_predict, y_ix_to_word) X_str = words_list(X_test.toarray(), X_ix_to_word) y_str = words_list(y_test.toarray(), y_ix_to_word) result_df = pd.DataFrame(data={'before': X_str, 'actual': y_str, 'predict': y_predict_str}) print('str acc', len(result_df[result_df['actual'] == result_df['predict']])/len(result_df)) return result_df