parser.add_argument("-k", "--knn", dest="knn", type=int, help="use k of the nearest neighbors (default 1)") args = parser.parse_args() test_file = args.test topK = args.knn maxN = args.maxn if not args.modelfile1 or not args.modelfile2: print "Specify modelfile1 and modelfile2" quit(-1) logging.info("load trained model file") modelfile1 = args.modelfile1 model1 = Category2Vec.load(modelfile1) modelfile2 = args.modelfile2 model2 = Category2Vec.load(modelfile2) logging.info("initializing pairnorm") model1.init_pairnorm() model2.init_pairnorm() #pairtable = np.empty((model1.pair_len, model1.layer1_size * 2), dtype=REAL) #init_joint_pairtable(model1, model2, pairtable) test_sentences = CatSentence(test_file) confusion_mtx = {} def prepare_sentences(): count = 0 for sent_tuple in test_sentences: yield sent_tuple
import cPickle as pickle logger = logging.getLogger("cat_predict_eval") if __name__ == "__main__": logging.basicConfig( format= '%(asctime)s %(relativeCreated)d : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO) logging.info("running %s" % " ".join(sys.argv)) program = os.path.basename(sys.argv[0]) if len(sys.argv) < 2: print(globals()['__doc__'] % locals()) sys.exit(1) parser = Category2Vec.arg_parser() parser.add_argument("--split", dest="split", action="store_true", help="use this option for split training data", default=False) parser.add_argument("--modelfile", dest="modelfile", type=str, help="trained model file") parser.add_argument("--test", dest="test", type=str, help="test file") parser.set_defaults(maxn=sys.maxint) parser.add_argument("--maxN", dest="maxn", type=int, help="") parser.set_defaults(knn=1) parser.add_argument("-k", "--knn",
current_dir = os.path.dirname(os.path.realpath(__file__)) wikip_data = current_dir+"/"+wiki_name c2v_model_name = current_dir+"/"+model_dir+"/"+ wiki_name + "_cat.model" if not os.path.exists(current_dir+"/"+model_dir): os.mkdir(current_dir+"/"+model_dir) if not os.path.isfile(wikip_data): logger.info("downloading Wikipedia data") urllib.urlretrieve(wiki_url, wikip_data) logger.info("downloaded in %s" % wikip_data) sentences = WikiSentence(wikip_data) if not os.path.isfile(c2v_model_name): model = Category2Vec(sentences,iteration=20, model="cb", hs = 1, negative = 0, size=300) model.save(c2v_model_name) else: model = Category2Vec.load(c2v_model_name) print "Input a category name or an article title (type EXIT to exit)" sys.stdout.write("Name: ") line = sys.stdin.readline() while line: line = utils.to_unicode(line.rstrip()) if line == "EXIT": break try: if model.cat_no_hash.has_key(line): cat_no = model.cat_no_hash[line] cat_vec = model.cats[cat_no] ncats = model.most_similar_category(cat_vec, 11) print "Similar categories similarity" print "-"*45
level=logging.INFO) current_dir = os.path.dirname(os.path.realpath(__file__)) wikip_data = current_dir + "/" + wiki_name c2v_model_name = current_dir + "/" + model_dir + "/" + wiki_name + "_cat.model" if not os.path.exists(current_dir + "/" + model_dir): os.mkdir(current_dir + "/" + model_dir) if not os.path.isfile(wikip_data): logger.info("downloading Wikipedia data") urllib.urlretrieve(wiki_url, wikip_data) logger.info("downloaded in %s" % wikip_data) sentences = WikiSentence(wikip_data) if not os.path.isfile(c2v_model_name): model = Category2Vec(sentences, iteration=20, model="cb", hs=1, negative=0, size=300) model.save(c2v_model_name) else: model = Category2Vec.load(c2v_model_name) print "Input a category name or an article title (type EXIT to exit)" sys.stdout.write("Name: ") line = sys.stdin.readline() while line: line = utils.to_unicode(line.rstrip()) if line == "EXIT": break try: if model.cat_no_hash.has_key(line):
from Queue import Queue import numpy as np import time,re import cPickle as pickle logger = logging.getLogger("cat_predict_eval") if __name__ == "__main__": logging.basicConfig(format='%(asctime)s %(relativeCreated)d : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO) logging.info("running %s" % " ".join(sys.argv)) program = os.path.basename(sys.argv[0]) if len(sys.argv) < 2: print(globals()['__doc__'] % locals()) sys.exit(1) parser = Category2Vec.arg_parser() parser.add_argument("--split", dest="split", action="store_true", help="use this option for split training data", default=False) parser.add_argument("--modelfile", dest="modelfile", type=str, help="trained model file") parser.add_argument("--test", dest="test", type=str, help="test file") parser.set_defaults(maxn=sys.maxint) parser.add_argument("--maxN", dest="maxn", type=int, help="") parser.set_defaults(knn=1) parser.add_argument("-k","--knn", dest="knn", type=int, help="use k of the nearest neighbors (default 1)") args = parser.parse_args() test_file = args.test topK = args.knn maxN = args.maxn if args.modelfile: logging.info("load trained model file") modelfile = args.modelfile model = Category2Vec.load(modelfile)