def greek_to_token(wrd): input = " ".join(list(clean(basify(wrd)))) sequences = tok.texts_to_sequences([input]) sequences_matrix = sequence.pad_sequences(sequences, maxlen=20) out = to_categorical(sequences_matrix[0], num_classes=alpha).reshape(1, 20, alpha) return out
def get_greek(self): wd = self.word_string.split("/")[0] wd = wd.lower() wd = clean(wd) return wd
model.compile(loss='categorical_crossentropy',optimizer=RMSprop(),metrics=['categorical_accuracy']) model.fit(to_categorical(sequences_matrix),encoded,batch_size=128,epochs=5,validation_split=0.1) model.save('/home/q078011/external/greek_dev/pos_mini.h5') st = '???t? d? ???? ?????e?, ??a p?????? t? ????? ?p? t?? ?????? d?? t?? p??f?t??, ?????t??' to_pos = clean(basify(st)) tnt_tot = tnt.TnT() tnt_tot.train([list(zip(list(all_str),list(y)))]) import pickle with open('/home/q078011/external/greek_dev/dict_letters.pkl', 'wb') as f: pickle.dump(dict_g, f) with open('/home/q078011/external/greek_dev/tokenizer.pkl', 'wb') as g: pickle.dump(tok, g)