Esempio n. 1
0
def preprocess_data():
    data['intent'] = data['intent'].map(intent_mapping)
    count = 0
    for i in data['question']:
        data.replace(i, tokenize(i), regex=True, inplace=True)
        if count % 50 == 0:
            print("CURRENT COLLECT : ", count)
        count += 1

    encode = []
    decode = []
    for q, i in data.values:
        encode.append(q)
        decode.append(i)

    return {'encode': encode, 'decode': decode}
Esempio n. 2
0
def server_tokenize(text):
    return tokenize(text)
Esempio n. 3
0
def pre_process(speech):
    return fix(tokenize(fix(speech)))