# Any results you write to the current directory are saved as output.

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Conv1D
from keras.layers import Bidirectional, GlobalMaxPool1D, MaxPooling1D, Add, Flatten
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras.models import Model, load_model
from keras import initializers, regularizers, constraints, optimizers, layers, callbacks
from keras import backend as K
from keras.engine import InputSpec, Layer
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from keras.layers import GRU, BatchNormalization, Conv1D, MaxPooling1D

maxlen = max_len()
batch_size = 256
dropout_rate = .35
recurrent_units = 128
dense_size = 6
fold_count = 3

PROBABILITIES_NORMALIZE_COEFFICIENT = 1.4

list_classes = [
    "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"
]
x_train, y_train, x_test = data_util.load_dataset()


def get_model(embedding_matrix,
Ejemplo n.º 2
0
from conf.configure import Configure as conf

from utils import data_util
from utils.nltk_utils import tokenize_sentences
from utils.clean_util import TextCleaner
from utils.embedding_utils import read_embedding_list, clear_embedding_list, convert_tokens_to_ids
from utils.data_util import max_len

from utils.extra_utils import load_data, Embeds, Logger
from nltk.tokenize import RegexpTokenizer
from preprocessing import clean_text, convert_text2seq, get_embedding_matrix, split_data

UNKNOWN_WORD = "_UNK_"
END_WORD = "_END_"
NAN_WORD = "_NAN_"
sentences_length = max_len()
remove_stop_words = True
stem_words = False  #True
swear_words_fname = '../input/swear_words.csv'
wrong_words_fname = '../input/correct_words.csv'

CLASSES = [
    "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"
]


def main():

    print("Loading data...")
    train_data = pd.read_csv(conf.train_data_path)
    test_data = pd.read_csv(conf.x_test_data_path)