def get_trans(): configs = get_configs() data = get_data() nn, ngram = get_models() # if bigram nn, it's from t to t-1, then need to transpose? nn_trans_dict = nn.predict_identity() nn_trans = nn_trans_dict.values()[0] print '...nn_trans type', type(nn_trans), nn_trans.shape nn_trans = nn_trans_dict['1'] print nn_trans_dict.keys() print '...nn_trans type', type(nn_trans), nn_trans.shape nn_trans = np.exp(nn_trans) ngram_trans = ngram.ngram return ngram_trans, nn_trans, ngram, nn, configs, data
def get_models(data=None, configs=None, save=False): if configs is None: configs = get_configs() if data is None: data = get_data(configs) # TODO: remove hack if configs["bigram"]: reduced_keys = [configs["reduced_key"]] data.keys = reduced_keys test_data = data.get_test_data() test_data.keys = reduced_keys retrieve_model = configs["retrieve_model"] model = SkipGramNN(data, configs) print "SkipGramNN, # of syms", len(model.syms) if not retrieve_model: model_loss = model.train() if save: model.save("skipgram-%s.pkl" % (configs["corpus"])) plt.clf() plt.plot(model.loss_curve) plt.savefig("losses-%s.png" % configs.name) print "=== train loss ===" print "loss: %.2f" % model_loss loss = model.check_loss() if not configs["regularize"]: assert np.allclose(loss, model_loss) if save: model_weights = model.weights.value fname = "w1-%s.pkl" % configs.name print fname with open(fname, "wb") as p: pickle.dump(model.W1.value, p) pickle.dump(data.syms, p) pickle.dump(model_loss, p) pickle.dump(model_weights, p) pickle.dump(configs, p) fname = "skipgram-bach.pkl" model.save(fname) else: fname = os.path.join("data", "test_skipgram_model.pkl") print fname assert fname is not None, "Error: no model to retrieve in the time being" with open(fname, "rb") as p: w1 = pickle.load(p) syms = pickle.load(p) model_loss = pickle.load(p) model_weights = pickle.load(p) configs_reloaded = pickle.load(p) for key in configs.keys(): if key not in configs_reloaded.keys(): print "no key", key for key in configs.keys(): if key in configs_reloaded.keys(): if configs[key] != configs_reloaded[key]: print configs[key], configs_reloaded[key] # assert configs == configs_reloaded model.init_weights(model_weights, model_loss) train_seq_data = data.get_train_seqs_data() train_seqs = [seq for seq in train_seq_data.seqs] syms = data.syms # ngram_model = NGram(train_seqs, syms, 2, configs) ngram_model = NGram(data.seqs, syms, 2, configs) print "\n\nNgram, # of syms", len(ngram_model.syms) if save: ngram_model.save("bigram-%s.pkl" % (configs["corpus"])) print len(ngram_model.syms), len(model.data.syms) assert ngram_model.syms == model.data.syms return model, ngram_model