def main(args): data_path = args[0] model_path = args[1] save_path = args[2] if len(args) > 3: m_num = int(args[3]) print ("Preparing Data...") # Test data Xt = [] yt = [] with io.open(data_path, "r", encoding="utf-8") as f: for line in f: (yc, Xc) = line.rstrip("\n").split("\t") Xt.append(Xc) yt.append(yc.split(",")) # Model print ("Loading model params...") if len(args) > 3: print "Loading %s/model_%d.npz" % (model_path, m_num) params = load_params("%s/model_%d.npz" % (model_path, m_num)) else: print "Loading %s/best_model.npz" % model_path params = load_params("%s/best_model.npz" % model_path) print ("Loading dictionaries...") with open("%s/dict.pkl" % model_path, "rb") as f: chardict = pkl.load(f) with open("%s/label_dict.pkl" % model_path, "rb") as f: labeldict = pkl.load(f) n_char = min(len(chardict.keys()) + 1, N_WORD) n_classes = min(len(labeldict.keys()) + 1, MAX_CLASSES) # iterators test_iter = batch.BatchTweets(Xt, yt, labeldict, batch_size=N_BATCH, max_classes=MAX_CLASSES, test=True) print ("Building network...") # Tweet variables tweet = T.itensor3() targets = T.imatrix() # masks t_mask = T.fmatrix() # network for prediction predictions, embeddings = classify(tweet, t_mask, params, n_classes, n_char) # Theano function print ("Compiling theano functions...") predict = theano.function([tweet, t_mask], predictions) encode = theano.function([tweet, t_mask], embeddings) # Test print ("Testing...") out_data = [] out_pred = [] out_emb = [] out_target = [] for xr, y in test_iter: x, x_m = batch.prepare_data(xr, chardict, n_tokens=n_char) p = predict(x, x_m) e = encode(x, x_m) ranks = np.argsort(p)[:, ::-1] for idx, item in enumerate(xr): out_data.append(item) out_pred.append(ranks[idx, :]) out_emb.append(e[idx, :]) out_target.append(y[idx]) # Save print ("Saving...") with open("%s/data.pkl" % save_path, "w") as f: pkl.dump(out_data, f) with open("%s/predictions.npy" % save_path, "w") as f: np.save(f, np.asarray(out_pred)) with open("%s/embeddings.npy" % save_path, "w") as f: np.save(f, np.asarray(out_emb)) with open("%s/targets.pkl" % save_path, "w") as f: pkl.dump(out_target, f)
def main(args): data_path = args[0] model_path = args[1] save_path = args[2] if len(args) > 3: m_num = int(args[3]) print("Preparing Data...") # Test data Xt = [] with io.open(data_path, 'r', encoding='utf-8') as f: for line in f: Xc = line.rstrip('\n') Xt.append(Xc) # Model print("Loading model params...") if len(args) > 3: params = load_params('%s/model_%d.npz' % (model_path, m_num)) else: params = load_params('%s/best_model.npz' % model_path) print("Loading dictionaries...") with open('%s/dict.pkl' % model_path, 'rb') as f: chardict = pkl.load(f) with open('%s/label_dict.pkl' % model_path, 'rb') as f: labeldict = pkl.load(f) n_char = min(len(chardict.keys()) + 1, N_WORD) n_classes = min(len(labeldict.keys()) + 1, MAX_CLASSES) inverse_labeldict = invert(labeldict) print("Building network...") # Tweet variables tweet = T.itensor3() t_mask = T.fmatrix() # network for prediction predictions, embeddings = classify(tweet, t_mask, params, n_classes, n_char) # Theano function print("Compiling theano functions...") predict = theano.function([tweet, t_mask], predictions) encode = theano.function([tweet, t_mask], embeddings) # Test print("Encoding...") out_pred = [] out_emb = [] numbatches = len(Xt) / N_BATCH + 1 for i in range(numbatches): xr = Xt[N_BATCH * i:N_BATCH * (i + 1)] x, x_m = batch.prepare_data(xr, chardict, n_tokens=n_char) p = predict(x, x_m) e = encode(x, x_m) ranks = np.argsort(p)[:, ::-1] for idx, item in enumerate(xr): out_pred.append(' '.join( [inverse_labeldict[r] for r in ranks[idx, :5]])) out_emb.append(e[idx, :]) # Save print("Saving...") with io.open('%s/predicted_tags.txt' % save_path, 'w') as f: for item in out_pred: f.write(item + '\n') with open('%s/embeddings.npy' % save_path, 'w') as f: np.save(f, np.asarray(out_emb))
def main(args): data_path = args[0] model_path = args[1] save_path = args[2] if len(args)>3: m_num = int(args[3]) print("Preparing Data...") # Test data Xt = [] with io.open(data_path,'r',encoding='utf-8') as f: for line in f: Xc = line.rstrip('\n') Xt.append(Xc) # Model print("Loading model params...") if len(args)>3: params = load_params('%s/model_%d.npz' % (model_path,m_num)) else: params = load_params('%s/best_model.npz' % model_path) print("Loading dictionaries...") with open('%s/dict.pkl' % model_path, 'rb') as f: chardict = pkl.load(f) with open('%s/label_dict.pkl' % model_path, 'rb') as f: labeldict = pkl.load(f) n_char = min(len(chardict.keys()) + 1, N_WORD) n_classes = min(len(labeldict.keys()) + 1, MAX_CLASSES) inverse_labeldict = invert(labeldict) print("Building network...") # Tweet variables tweet = T.itensor3() t_mask = T.fmatrix() # network for prediction predictions, embeddings = classify(tweet, t_mask, params, n_classes, n_char) # Theano function print("Compiling theano functions...") predict = theano.function([tweet,t_mask],predictions) encode = theano.function([tweet,t_mask],embeddings) # Test print("Encoding...") out_pred = [] out_emb = [] numbatches = len(Xt)/N_BATCH + 1 for i in range(numbatches): xr = Xt[N_BATCH*i:N_BATCH*(i+1)] x, x_m = batch.prepare_data(xr, chardict, n_tokens=n_char) p = predict(x,x_m) e = encode(x,x_m) ranks = np.argsort(p)[:,::-1] for idx, item in enumerate(xr): out_pred.append(' '.join([inverse_labeldict[r] for r in ranks[idx,:5]])) out_emb.append(e[idx,:]) # Save print("Saving...") with io.open('%s/predicted_tags.txt'%save_path,'w') as f: for item in out_pred: f.write(item + '\n') with open('%s/embeddings.npy'%save_path,'w') as f: np.save(f,np.asarray(out_emb))
def main(args): data_path = args[0] model_path = args[1] save_path = args[2] if len(args) > 3: m_num = int(args[3]) print("Preparing Data...") # Test data Xt = [] yt = [] with io.open(data_path, 'r', encoding='utf-8') as f: for line in f: (yc, Xc) = line.rstrip('\n').split('\t') Xt.append(Xc) yt.append(yc.split(',')) # Model print("Loading model params...") if len(args) > 3: print 'Loading %s/model_%d.npz' % (model_path, m_num) params = load_params('%s/model_%d.npz' % (model_path, m_num)) else: print 'Loading %s/best_model.npz' % model_path params = load_params('%s/best_model.npz' % model_path) print("Loading dictionaries...") with open('%s/dict.pkl' % model_path, 'rb') as f: chardict = pkl.load(f) with open('%s/label_dict.pkl' % model_path, 'rb') as f: labeldict = pkl.load(f) n_char = min(len(chardict.keys()) + 1, N_WORD) n_classes = min(len(labeldict.keys()) + 1, MAX_CLASSES) # iterators test_iter = batch.BatchTweets(Xt, yt, labeldict, batch_size=N_BATCH, max_classes=MAX_CLASSES, test=True) print("Building network...") # Tweet variables tweet = T.itensor3() targets = T.imatrix() # masks t_mask = T.fmatrix() # network for prediction predictions, embeddings = classify(tweet, t_mask, params, n_classes, n_char) # Theano function print("Compiling theano functions...") predict = theano.function([tweet, t_mask], predictions) encode = theano.function([tweet, t_mask], embeddings) # Test print("Testing...") out_data = [] out_pred = [] out_emb = [] out_target = [] for xr, y in test_iter: x, x_m = batch.prepare_data(xr, chardict, n_tokens=n_char) p = predict(x, x_m) e = encode(x, x_m) ranks = np.argsort(p)[:, ::-1] for idx, item in enumerate(xr): out_data.append(item) out_pred.append(ranks[idx, :]) out_emb.append(e[idx, :]) out_target.append(y[idx]) # Save print("Saving...") with open('%s/data.pkl' % save_path, 'w') as f: pkl.dump(out_data, f) with open('%s/predictions.npy' % save_path, 'w') as f: np.save(f, np.asarray(out_pred)) with open('%s/embeddings.npy' % save_path, 'w') as f: np.save(f, np.asarray(out_emb)) with open('%s/targets.pkl' % save_path, 'w') as f: pkl.dump(out_target, f)