Beispiel #1
0
def main():
    path_pfx = ''
    max_len = 37

    dm = DataManager()
    dm.add_data('test', os.path.join(sys.argv[1]), False, True)
    print(len(dm.data['test'][0]))
    dm.preprocessing()
    dm.load_word2vec(os.path.join(path_pfx, 'model/word2vec'))
    #dm.load_tokenizer(os.path.join(path_pfx, 'token.pkl'))
    dm.to_sequence(max_len, use_pretrain=True)
    result = predict(dm.data['test'][0], path_pfx)
    write(sys.argv[2], result)
    print('finished')
Beispiel #2
0
def main():
    voc_size = None
    max_len = 39
    path_pfx = ''
    dm = DataManager()
    dm.add_data('train', sys.argv[1])
    #dm.add_data('semi', os.path.join(path_pfx, 'training_nolabel.txt'), False)
    #dm.add_data('test', os.path.join(path_pfx, 'testing_data.txt'), False, True)
    dm.preprocessing()

    dm.load_word2vec(os.path.join(path_pfx, 'model/word2vec'))
    #dm.load_embedding_matrix(os.path.join(path_pfx, 'word2vec.wv.vectors.npy'))
    dm.to_sequence(max_len, use_pretrain=True)
    #dm.to_bow()

    print(max_len)

    #emb_mat =  dm.get_embedding_matrix()
    emb_mat = None

    train(dm, voc_size=voc_size, max_len=max_len, emb_mat=emb_mat)