#! /usr/bin python import sys import os import numpy as np sys.path.append("./scripts/") from scripts.classification.textcnn import TextCnn from data.data_loader import DataLoader conf_file = sys.argv[1] loader = DataLoader() loader.load_dict("data/title_dict.json") loader.build(conf_file) data = loader.get_predict() conf2 = { "embedding_size": loader.word_vec_len, "vocab_size": len(loader.weights), "sequence_len": loader.max_len, "epochs": 10 } model = TextCnn(conf2) model.load_model("model/weights.010-0.9952.hdf5") result = model.predict(data) f = open("/mnt/hgfs/share/pornCensor/query.sug.title/query.random.5w", "r") fw = open("neg", "w") lines = f.readlines() for i in range(result.shape[0]): if result[i][1] > 0.5: print(lines[i]) else: fw.write(lines[i])
type=str, default="conf/query_conf", help="conf_file containes sample files and labels") parser.add_argument("--w2v_path", type=str, default="/mnt/hgfs/share/pornCensor/query.skip.vec.win3", help="w2v file which provide w2v") FLAGS, unparsed = parser.parse_known_args() print("unparsed: ", unparsed) params = {"ratio": 0.2, "max_len": 15, "embedding_size": 100} loader = DataLoader() loader.set_params(params) loader.set_w2v(FLAGS.w2v_path) loader.build(FLAGS.conf_file) #loader.save_dict("data/title_dict.json") train_data, test_data, train_label, test_label = loader.get_train_test() conf = { "embedding_size": loader.word_vec_len, "vocab_size": len(loader.weights), "sequence_len": loader.max_len, "epochs": 100, "classes": loader.classes } #model = Lr(conf) model = TextCnn(conf) #model = Fasttext(conf) #model = TextRnn(conf) #model = AttentiveTextRnn(conf)