def fit(self): """ Fitting a model on the training dataset. """ print("\nTraining started.\n") self.model.train() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate, weight_decay=self.args.weight_decay) self.optimizer.zero_grad() epoch_range = trange(self.args.epochs, desc="Epoch: ", leave=True) for epoch in epoch_range: random.shuffle(self.training_graphs) batches = create_batches(self.training_graphs, self.args.batch_size) self.epoch_loss = 0 self.nodes_processed = 0 batch_range = trange(len(batches)) for batch in batch_range: self.epoch_loss = self.epoch_loss + self.process_batch( batches[batch]) self.nodes_processed = self.nodes_processed + len( batches[batch]) loss_score = round(self.epoch_loss / self.nodes_processed, 4) batch_range.set_description("(Loss=%g)" % loss_score) self.update_log()
def process_seed_pages(pages_db, redirects_lookup, seed_pages, depth=1, limit=10000): '''Get the mentions in each of the seed pages as well as the pages they link to. Set `depth` > 1 to also process the pages that those pages link to''' processed_pages = _process_pages(redirects_lookup, seed_pages, is_seed_page=True, limit=limit) latest_processed_pages = processed_pages visited_page_titles = set([ processed_page['document_info']['title'] for processed_page in processed_pages ]) for layer in range(depth): print("Getting referenced pages") pages_referenced = get_outlinks(latest_processed_pages) page_titles_to_fetch = pages_referenced - visited_page_titles batch_size = 1000 print("Fetching and processing", len(page_titles_to_fetch), "pages in", batch_size, "batches") for batch_num, titles_batch in progressbar( enumerate( u.create_batches(list(page_titles_to_fetch), batch_size=batch_size)), max_value=int(len(page_titles_to_fetch) / batch_size)): batch_pages_to_process = _fetch_pages(pages_db, titles_batch) latest_processed_pages = _process_pages(redirects_lookup, batch_pages_to_process) processed_pages += latest_processed_pages visited_page_titles = visited_page_titles.union(pages_referenced) return processed_pages
def main(): batch_size = 10 ext_emb_path = config.ext_emb_path input_x, input_y = loader.prepare_input(config.datadir + config.train) emb_layer = pretrain.Embedding(ext_emb_path) seqlen, input_x = utils.convert_to_id(input_x, emb_layer.word_to_id) input_y, tag_to_id = utils.convert_tag_to_id(input_y) seqlen, inp = utils.create_batches(input_x, input_y, seqlen, batch_size) sess = tf.Session() graph = loader.reload_smodel(sess) num_labels = len(tag_to_id) source_lstm = SourceLSTM() target_lstm = TargetLSTM() ff_layer = pretrain.FeedForward(2 * config.lstm_size, num_labels) init_op = tf.global_variables_initializer() batch_input = tf.placeholder("int32", shape=[None, None], name="input") sequence_length = tf.placeholder("int32", shape=[None], name="seqlen") labels = tf.placeholder("int32", shape=[None, None, num_labels], name="labels") embeddings = emb_layer.lookup(batch_input) source_hidden_output = source_lstm.forward(embeddings, sequence_length) target_hidden_output = target_lstm.forward(embeddings, sequence_length) #sess.run(init_op) target_lstm._initialize(sess)
def test(sess, model, test_url, batch_size): test_set, test_count, _ = utils.data_set(test_url) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in test_batches: data_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(test_batches) print('| Epoch test: {:d} |'.format(1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld))
def prepare_batches(self, train_data, batch_size): train_batches = utils.create_batches(train_data, batch_size) batches = [] for batch in train_batches: data_batch, prediction_batch = utils.unify_batch(batch) batches.append((data_batch, prediction_batch)) return batches
def train(nvdm, train_url, optimizer, batch_size=64, training_epochs=1000): train_set, train_count = utils.data_set(train_url) for epoch in range(training_epochs): train_batches = utils.create_batches(len(train_set), batch_size) loss_sum = 0.0 for idx_batch in train_batches: data_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, 2000) data_batch = torch.FloatTensor(data_batch) mask = torch.FloatTensor(mask) loss = nvdm(data_batch, mask) optimizer.zero_grad() loss.backward() optimizer.step() loss_sum += loss.item() print(loss_sum / len(train_batches))
def run(net, optimizer, data_list, corpus_word_count, is_train): perplexity = torch.tensor(0, dtype=torch.float) kld = torch.tensor(0, dtype=torch.float) doc_count = torch.tensor(0, dtype=torch.float) idx_batches = utils.create_batches(len(data_list), batch_size, shuffle=is_train) for idx_batch in idx_batches: # get batch data batch, batch_word_count, mask = utils.fetch_batch_data( data_list, corpus_word_count, idx_batch, vocab_num) batch = torch.tensor(batch, dtype=torch.float, device=device) batch_word_count = torch.tensor(batch_word_count, dtype=torch.float, device=device) mask = torch.tensor(mask, dtype=torch.float, device=device) # forward propagation shape, scale, lam, out = net(batch) # compute batch loss batch_likelihood, batch_kld = net.compute_batch_loss( batch, out, shape, scale) batch_loss = (batch_likelihood + batch_kld) * mask # compute cumulative loss perplexity += torch.sum(batch_loss / (batch_word_count + 1e-12)).detach() kld += (torch.sum(batch_kld) / torch.sum(mask)).detach() doc_count += torch.sum(mask).detach() # train or validate if is_train: optimizer.zero_grad() batch_loss.backward(mask) optimizer.step() perplexity = torch.exp(perplexity / doc_count) kld = kld / len(idx_batches) return perplexity, kld
def optimize(self, x_train, y_train, x_test, y_test, epochs=1, batch_size=100): from utils import create_batches self.x_train, self.y_train = x_train, y_train self.x_test, self.y_test = x_test, y_test for epoch in range(epochs): for X, y in create_batches(x_train, y_train, batch_size): loss = self.nn.eval(X, y, training_run=True) self.optimizer.optimization_step() # print(loss) for callback in self.batch_callbacks: callback.step() for callback in self.epoch_callbacks: callback.step() for callback in self.on_finish_callbacks: callback.step()
def run(config, model_name): config = load_yaml(config) if model_name not in config['model']: raise NotImplementedError("{} is not implemented. ".format(model_name)) preprocessing_params = config['preprocessing'] training_params = config['training'] model_params = config['model'][model_name] train_df = pd.read_csv(preprocessing_params['train_path'], sep='\t') test_df = pd.read_csv(preprocessing_params['test_path'], sep='\t') t_list = preprocessing_params['target_list'] model_params['targets'] = len(t_list) train_df['tokens'] = train_df['Tweet'].map(lambda x: tokenize(x)) test_df['tokens'] = test_df['Tweet'].map(lambda x: tokenize(x)) train_df['lengths'] = train_df['tokens'].map(lambda x: len(x)) test_df['lengths'] = test_df['tokens'].map(lambda x: len(x)) word_freq_dict = create_freq_vocabulary( list(train_df['tokens']) + list(test_df['tokens'])) tokens = get_top_freq_words(word_freq_dict, 1) train_df = train_df.sort_values(by="lengths") test_df = test_df.sort_values(by="lengths") embeddings = get_embeddings(path=preprocessing_params['embeddings_path']) w2i = create_final_dictionary(tokens, embeddings, unk_token=preprocessing_params['unk_token'], pad_token=preprocessing_params['pad_token']) emb_matrix = get_embeddings_matrix(w2i, embeddings, preprocessing_params['embedding_size']) model_params['embeddings'] = emb_matrix train_batches = create_batches(train_df, training_params['batch_size'], w2i=w2i, pad_token=preprocessing_params['pad_token'], unk_token=preprocessing_params['unk_token'], target_list=t_list) test_batches = create_batches(test_df, training_params['batch_size'], w2i=w2i, pad_token=preprocessing_params['pad_token'], unk_token=preprocessing_params['unk_token'], target_list=t_list) model = ModelFactory.get_model(model_name, model_params) optimizer = Adam(model.trainable_weights, training_params['lr']) criterion = BCEWithLogitsLoss() train(model, train_batches, test_batches, optimizer, criterion, epochs=training_params['epochs'], init_patience=training_params['patience'], cuda=False, target_list=t_list) model = load_model(model) full_classification_report(model, test_batches, t_list)
def train(sess, model, train_url, test_url, batch_size, training_epochs=1000, alternate_epochs=10): """train nvdm model.""" train_set, train_count = utils.data_set(train_url) test_set, test_count = utils.data_set(test_url) # hold-out development dataset dev_set = test_set[:50] dev_count = test_count[:50] dev_batches = utils.create_batches(len(dev_set), batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) for epoch in range(training_epochs): train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) #------------------------------- # train for switch in range(0, 2): if switch == 0: optim = model.optim_dec print_mode = 'updating decoder' else: optim = model.optim_enc print_mode = 'updating encoder' for i in range(alternate_epochs): loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in train_batches: data_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, FLAGS.vocab_size) input_feed = { model.x.name: data_batch, model.mask.name: mask } _, (loss, kld) = sess.run( (optim, [model.objective, model.kld]), input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(train_batches) print( '| Epoch train: {:d} |'.format(epoch + 1), print_mode, '{:d}'.format(i), '| Corpus ppx: {:.5f}'.format( print_ppx), # perplexity for all docs '| Per doc ppx: {:.5f}'.format( print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld)) #------------------------------- # dev loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in dev_batches: data_batch, count_batch, mask = utils.fetch_data( dev_set, dev_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(dev_batches) print('| Epoch dev: {:d} |'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld)) #------------------------------- # test if FLAGS.test: loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in test_batches: data_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(test_batches) print('| Epoch test: {:d} |'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld))
"-w", "--word2vec", default=True, help="Use word2vec embeddings" ) optparser.add_option( "-r", "--restore", default=True, help="Rebuild the model and restore weights from checkpoint" ) opts = optparser.parse_args()[0] sess = tf.Session() adv = AdversarialLearning(sess, opts) input_x, _ = loader.prepare_input(config.datadir + config.train) s_seqlen, s_input = utils.convert_to_id(input_x, adv.emb_layer.word_to_id) s_seqlen, s_input = utils.create_batches(s_input, s_seqlen) input_x, _ = loader.prepare_medpost_input() t_seqlen, t_input = utils.convert_to_id(input_x, adv.emb_layer.word_to_id) t_seqlen, t_input = utils.create_batches(t_input, t_seqlen) s_len = len(s_input) t_len = len(t_input) # Do not initialize Source and Target LSTM weights; The variables are from index 0 to 8. # TODO: Find better fix for initialization of variables init = tf.variables_initializer(tf.global_variables()[9:]) sess.run(init) gloss = [] dloss = [] plt.axis([0, 10000, 0, 4]) plt.ion()
def train(sess, model, train_url, test_url, dev_url, model_url, batch_size, saver, training_epochs=400, alternate_epochs=1): """train nvctm model.""" train_set, train_count = utils.data_set(train_url) dev_set, dev_count = utils.data_set(dev_url) test_set, test_count = utils.data_set(test_url) dev_batches = utils.create_batches(len(dev_set), batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) train_theta = [] train_beta = [] for epoch in range(training_epochs): train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) # ------------------------------- # train for switch in range(0, 2): if switch == 0: optim = model.optim_dec print_mode = 'updating decoder' else: optim = model.optim_enc print_mode = 'updating encoder' for i in range(alternate_epochs): loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 res_sum = 0 log_sum = 0 mean_sum = 0 var_sum = 0 m = None Um = None enc = None for idx_batch in train_batches: data_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, FLAGS.vocab_size) input_feed = { model.x.name: data_batch, model.mask.name: mask } _, (loss, kld, mean, Umean, enc, rec_loss, log_s, mean_s, vk_show, theta, beta, lp, v) = sess.run((optim, [ model.objective, model.kld, model.mean, model.U, model.vk, model.recons_loss, model.log_squre, model.mean_squre, model.vk_show, model.theta, model.beta, model.log_prob, model.variance ]), input_feed) m = mean Um = Umean # print('*********************vk show', vk_show) # print('Umean', Umean[0]) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) res_sum += np.sum(rec_loss) log_sum += np.sum(log_s) mean_sum += np.sum(mean_s) var_sum += np.sum(v) / np.sum(mask) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) if epoch == training_epochs - 1 and switch == 1 and i == alternate_epochs - 1: train_theta.extend(theta) train_beta.extend(beta) print_ppx = np.exp(loss_sum / word_count) # print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(train_batches) print_res = res_sum / len(train_batches) print_log = log_sum / len(train_batches) print_mean = mean_sum / len(train_batches) print_var = var_sum / len(train_batches) print( '| Epoch train: {:d} |'.format(epoch + 1), print_mode, '{:d}'.format(i), '| Corpus ppx: {:.5f}'.format( print_ppx), # perplexity per word # '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld), '| stddev {:.5}'.format(print_var), '| res_loss: {:5}'.format(print_res), '| log_loss: {:5}'.format(print_log), '| mean_loss: {:5}'.format(print_mean)) with codecs.open('./nvctm_train_theta', 'wb') as fp: pickle.dump(np.array(train_theta), fp) fp.close() if (epoch + 1 ) % 50 == 0 and switch == 1 and i == alternate_epochs - 1: with codecs.open('./nvctm_train_beta', 'wb') as fp: pickle.dump(beta, fp) fp.close() npmi.print_coherence('nvctm', FLAGS.data_dir + '/train.feat', FLAGS.vocab_size) # dev loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 var_sum = 0 word_count = 0 doc_count = 0 for idx_batch in dev_batches: data_batch, count_batch, mask = utils.fetch_data( dev_set, dev_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld, v = sess.run( [model.objective, model.kld, model.variance], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) var_sum += np.sum(v) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_var = var_sum / len(train_batches) # print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(dev_batches) print('\n| Epoch dev: {:d}'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| stddev {:.5}'.format(print_var), '| KLD: {:.5}'.format(print_kld)) # test if FLAGS.test: loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 var_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in test_batches: data_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld, v = sess.run( [model.objective, model.kld, model.variance], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) var_sum += np.sum(v) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_var = var_sum / len(train_batches) # print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(test_batches) print('| Epoch test: {:d}'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| stddev {:.5}'.format(print_var), '| KLD: {:.5}\n'.format(print_kld)) npmi.print_coherence('nvctm', FLAGS.data_dir + '/train.feat', FLAGS.vocab_size) saver.save(sess, model_url)
def train(gpu, args): rank = args.nr * args.gpus + gpu dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) torch.manual_seed(0) words = read_words( '/users/PAS1588/liuluyu0378/lab1/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled', seq_len, kernel[0]) word_counter = collections.Counter(words).most_common(vocab_size - 1) vocab = [w for w, _ in word_counter] w2i = dict((w, i) for i, w in enumerate(vocab, 1)) w2i['<unk>'] = 0 print('vocab_size', vocab_size) print('w2i size', len(w2i)) data = [w2i[w] if w in w2i else 0 for w in words] data = create_batches(data, batch_size, seq_len) split_idx = int(len(data) * 0.8) training_data = data[:split_idx] test_data = data[split_idx:] print('train samples:', len(training_data)) print('test samples:', len(test_data)) model = GatedCNN(seq_len, vocab_size, embd_size, n_layers, kernel, out_chs, res_block_count, vocab_size) torch.cuda.set_device(gpu) model.cuda(gpu) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(gpu) optimizer = torch.optim.SGD(model.parameters(), 1e-4) # Wrap the model model = nn.parallel.DataParallel(model, device_ids=[gpu]) print("model transfered") optimizer = torch.optim.Adadelta(model.parameters()) loss_fn = nn.NLLLoss() # Data loading code train_sampler = torch.utils.data.distributed.DistributedSampler( training_data, num_replicas=args.world_size, rank=rank) train_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True, sampler=train_sampler) start = datetime.now() total_step = len(train_loader) print("loaded") for epoch in range(args.epochs): a = time.time() print('----epoch', epoch) # random.shuffle(data) # print(len(data)) for batch_ct, (X, Y) in enumerate(train_loader): X = to_var(torch.LongTensor(X)) # (bs, seq_len) Y = to_var(torch.LongTensor(Y)) # (bs,) # print(X.size(), Y.size()) # print(X) # print(batch_ct, X.size(), Y.size()) pred = model(X) # (bs, ans_size) # _, pred_ids = torch.max(pred, 1) loss = loss_fn(pred, Y) if batch_ct % 100 == 0: print('loss: {:.4f}'.format(loss.data.item())) optimizer.zero_grad() loss.backward() optimizer.step() b = time.time() print('current performance at epoch', epoch, "time:", b - a) if gpu == 0: print("Training complete in: " + str(datetime.now() - start))
# load model model = torch.load(fn_model, map_location={'cuda:1':'cuda:{}'.format(gpu_device)}) # make the rnn parameters a continuous chunk, which will speed up forward pass model.rnn.flatten_parameters() criterion = torch.nn.CrossEntropyLoss() loader = DataLoader(fn_vocab) plt.figure() for fn in fn_data: # prepare dataset print('Processing %s...' % fn) word_list = load_clm_words(fn) test_data = loader.tokenize(word_list) test_data_batches = utils.create_batches(test_data, batch_size=1, device='cuda') losses = evaluate(test_data_batches) ppl_counter = Counter() x_interval = numpy.array([i*0.2 for i in range(100)]) for loss in losses: idx = numpy.argmin(abs(x_interval-loss)) ppl_counter.update([x_interval[idx]]) keys = [] vals = [] for key, value in sorted(ppl_counter.items()): keys.append(key) vals.append(value)
def train(sess, model, train_url, test_url, dev_url, batch_size, training_epochs=1000, alternate_epochs=1): """train gsm model.""" train_set, train_count = utils.data_set(train_url) test_set, test_count = utils.data_set(test_url) dev_set, dev_count = utils.data_set(dev_url) dev_batches = utils.create_batches(len(dev_set), batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) kld_list = [] var_list = [] train_theta = [] train_beta = [] test_theta = [] test_beta = [] for epoch in range(training_epochs): train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) # ------------------------------- # train for switch in range(0, 2): if switch == 0: optimize = model.optimize_dec print_mode = 'updating decoder' elif switch == 1: optimize = model.optimize_enc print_mode = 'updating encoder' for i in range(alternate_epochs): loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 var_sum = 0 for idx_batch in train_batches: data_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask, model.is_training.name: True, model.gamma.name: epoch/training_epochs} _, (loss, kld, v, theta, beta) =\ sess.run((optimize, [model.reconstruction_loss, model.kld, model.variance, model.topic_dist, model.beta]), input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) var_sum += np.sum(v) / np.sum(mask) # print([np.max(theta[i]) for i in range(batch_size)]) # print([np.argmax(theta[i]) for i in range(batch_size)]) word_count += np.sum(count_batch) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) if epoch == training_epochs - 1 and switch == 1 and i == alternate_epochs - 1: train_theta.extend(theta) train_beta.extend(beta) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(train_batches) print_var = var_sum / len(train_batches) kld_list.append(print_kld) var_list.append(print_var) print('| Epoch train: {:d}'.format(epoch + 1), print_mode, '{:d}'.format(i + 1), '| Corpus ppx: {:.5f}'.format(print_ppx), # perplexity for all docs '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld), '| stddev {:.5}'.format(print_var)) with codecs.open('./gsm_train_theta', 'wb') as fp: pickle.dump(np.array(train_theta), fp) fp.close() if (epoch + 1) % 50 == 0 and switch == 1 and i == alternate_epochs - 1: with codecs.open('./gsm_train_beta', 'wb') as fp: pickle.dump(beta, fp) fp.close() npmi.print_coherence('gsm', FLAGS.data_dir + '/train.feat', FLAGS.vocab_size) # ------------------------------- # dev loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 var_sum = 0 for idx_batch in dev_batches: data_batch, count_batch, mask = utils.fetch_data(dev_set, dev_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask, model.is_training.name: False, model.gamma.name: 0} loss, kld, v = sess.run([model.objective, model.kld, model.variance], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) var_sum += np.sum(v) / np.sum(mask) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(dev_batches) print_var = var_sum / len(train_batches) print('\n| Epoch dev: {:d}'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld), '| stddev: {:.5}'.format(print_var)) # test if FLAGS.test: loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx, idx_batch in enumerate(test_batches): data_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask, model.is_training.name: False, model.gamma.name: 0} loss, kld, theta, beta, v = sess.run([model.objective, model.kld, model.topic_dist, model.beta, model.variance], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) test_theta.extend(theta) if idx == len(test_batches) - 1: test_beta.extend(beta) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(test_batches) print('| Epoch test: {:d}'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld), '| stddev: {:.5}\n'.format(print_var)) npmi.print_coherence('gsm', FLAGS.data_dir + '/train.feat', FLAGS.vocab_size) with codecs.open('./test_theta', 'wb') as fp: pickle.dump(test_theta, fp) fp.close() with codecs.open('./test_beta', 'wb') as fp: pickle.dump(test_beta, fp) fp.close() with codecs.open('./kld.txt', 'w', 'utf-8') as fp: for idx, kld in enumerate(kld_list): if idx < len(kld_list) - 1: fp.write(str(kld) + ', ') else: fp.write(str(kld)) fp.close() with codecs.open('./var.txt', 'w', 'utf-8') as fp: for idx, var in enumerate(var_list): if idx < len(var_list) - 1: fp.write(str(var) + ', ') else: fp.write(str(var)) fp.close()
def train(sess, model, train_url, test_url, batch_size, training_epochs=1000, alternate_epochs=10): """train gsm model.""" # train_set: 维度为1 x vocab_size,每一维是对应的词出现次数, train_count: 训练集的总词数 train_set, train_count = utils.data_set(train_url) test_set, test_count = utils.data_set(test_url) # hold-out development dataset, 选取前50篇文档 dev_set = test_set[:50] dev_count = test_count[:50] dev_batches = utils.create_batches(len(dev_set), batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) for epoch in range(training_epochs): # 创建batches,大小为batch_size train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) # ------------------------------- # train for switch in range(0, 2): if switch == 0: optimize = model.optimize_dec print_mode = 'updating decoder' elif switch == 1: optimize = model.optimize_enc print_mode = 'updating encoder' for i in range(alternate_epochs): loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 # 训练每个batch for idx_batch in train_batches: ''' data_batch: 当前batch的词频向量集合,batch_size*vocab_size count_batch: 当前batch中每篇文档的词数 train_set: 训练集 train_count: 训练集词数 idx_batch: 当前batch mask: 用于某个batch文档不足时做序列对齐 ''' data_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, FLAGS.vocab_size) # input: x = data_batch, mask = mask input_feed = { model.x.name: data_batch, model.mask.name: mask } # return: loss = objective, kld = kld, optimizer = optimize # 以上三者组成feed_dict, 将模型中的tensor映射到具体的值 _, (loss, kld) = sess.run( (optimize, [model.objective, model.kld]), input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) # 总词数 word_count += np.sum(count_batch) # to avoid nan error, 避免0分母 count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(train_batches) print( '| Epoch train: {:d} |'.format(epoch + 1), print_mode, '{:d}'.format(i + 1), '| Corpus ppx: {:.5f}'.format( print_ppx), # perplexity for all docs '| Per doc ppx: {:.5f}'.format( print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld)) # ------------------------------- # dev loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in dev_batches: data_batch, count_batch, mask = utils.fetch_data( dev_set, dev_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(dev_batches) print('| Epoch dev: {:d} |'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld)) # ------------------------------- # test if FLAGS.test: loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in test_batches: data_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(test_batches) print('| Epoch test: {:d} |'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld))
def train( train_url, test_url, model_url, vocab_url, non_linearity, embedding_url, training_epochs, alternate_epochs, vocab_size, embedding_size, n_hidden, n_topic, n_sample, learning_rate, batch_size, is_training, mix_num, ): """train crntm model.""" train_set, train_count = utils.data_set(train_url) test_set, test_count = utils.data_set(test_url) vocab = utils.get_vocab(vocab_url) embedding_table = utils.load_embedding( embedding_url, embedding_size, vocab, FLAGS.data_dir + '/vocab_embedding-{}.pkl'.format(embedding_size)) # hold-out development dataset dev_count = test_count[:50] dev_onehot_set = test_set[:50] dev_batches = utils.create_batches(len(dev_onehot_set), batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) # create model crntm = CRNTM(vocab_size=vocab_size, embedding_size=embedding_size, n_hidden=n_hidden, n_topic=n_topic, n_sample=n_sample, learning_rate=learning_rate, batch_size=batch_size, non_linearity=non_linearity, embedding_table=embedding_table, is_training=is_training, mix_num=mix_num) crntm.construct_model() sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) model = crntm saver = tf.train.Saver() # # if RESTORE: # return embedding_table[1:] for epoch in range(training_epochs): train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) #------------------------------- # train for switch in range(0, 2): if switch == 0: optim = model.optim_dec print_mode = 'updating decoder' else: optim = model.optim_enc print_mode = 'updating encoder' for i in range(alternate_epochs): loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 res_sum = 0 log_sum = 0 r_sum = 0 log_s = None r_loss = None g_loss = None for bn, idx_batch in enumerate(train_batches): data_onehot_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, FLAGS.vocab_size) input_feed = { model.x_onehot.name: data_onehot_batch, model.mask.name: mask } _, (loss, kld, rec_loss, log_s, r_loss, g_loss) = sess.run( (optim, [ model.objective, model.kld, model.recons_loss, model.logits, model.doc_vec, model.topic_word_prob ]), input_feed) # if switch==0: # # # print(bn, len(train_batches), mask.sum(), r_loss.shape) # print('ptheta', log_s) # print('doc_Vec', r_loss) # print('topic_prob', g_loss) res_sum += np.sum(rec_loss) log_sum += np.sum(log_s) loss_sum += np.sum(loss) r_sum += np.sum(r_loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) # print(np.sum(np.divide(loss, count_batch))) doc_count += np.sum(mask) # if doc_count>11264: # print('debug:: ', doc_count, rec_loss, kld, loss[-1], count_batch[-1]) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(train_batches) print_res = res_sum / len(train_batches) print_log = log_sum / len(train_batches) print_mean = r_sum / len(train_batches) message = '| Epoch train: {:d} | {} {:d} | Corpus ppx: {:.5f}::{} | Per doc ppx: {:.5f}::{} | KLD: {:.5} | res_loss: {:5} | log_loss: {:5} | r_loss: {:5}'.format( epoch + 1, print_mode, i, print_ppx, word_count, print_ppx_perdoc, doc_count, print_kld, print_res, print_log, print_mean, ) print(message) write_result(message) TopicWords(sess, vocab_url, embedding_table[1:]) #------------------------------- # dev loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 res_sum = 0 log_sum = 0 mean_sum = 0 r_sum = 0 for idx_batch in dev_batches: data_onehot_batch, count_batch, mask = utils.fetch_data( dev_onehot_set, dev_count, idx_batch, FLAGS.vocab_size) input_feed = { model.x_onehot.name: data_onehot_batch, model.mask.name: mask } loss, kld, rec_loss, log_s, r_loss = sess.run([ model.objective, model.kld, model.recons_loss, model.embedding_loss, model.res_loss ], input_feed) res_sum += np.sum(rec_loss) log_sum += np.sum(log_s) loss_sum += np.sum(loss) r_sum += np.sum(r_loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) # print(np.sum(np.divide(loss, count_batch))) doc_count += np.sum(mask) # if doc_count>11264: # print('debug:: ', doc_count, rec_loss, kld, loss[-1], count_batch[-1]) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) # print_ppx_perdoc = ppx_sum / doc_count # print(loss_sum, word_count) print_kld = kld_sum / len(train_batches) print_res = res_sum / len(train_batches) print_log = log_sum / len(train_batches) print_mean = r_sum / len(train_batches) message = '| Epoch dev: {:d} | Corpus ppx: {:.5f}::{} | Per doc ppx: {:.5f}::{} | KLD: {:.5} | res_loss: {:5} | log_loss: {:5} | r_loss: {:5}'.format( epoch + 1, print_ppx, word_count, print_ppx_perdoc, doc_count, print_kld, print_res, print_log, print_mean, ) print(message) write_result(message) # test if FLAGS.test: loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in test_batches: data_onehot_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, FLAGS.vocab_size) input_feed = { model.x_onehot.name: data_onehot_batch, model.mask.name: mask } loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(test_batches) message = '| Epoch test: {:d} | Corpus ppx: {:.5f} | Per doc ppx: {:.5f} | KLD: {:.5} '.format( epoch + 1, print_ppx, print_ppx_perdoc, print_kld, ) print(message) write_result(message) saver.save(sess, model_url)
def train(sess, model, train_url, test_url, batch_size, FLAGS, train_csv_filename, dev_csv_filename, test_csv_filename, training_epochs=1000, alternate_epochs=10, is_restore=False): """train nvdm model.""" train_set, train_count = utils.data_set(train_url) test_set, test_count = utils.data_set(test_url) # hold-out development dataset dev_set = test_set[:50] dev_count = test_count[:50] dev_batches = utils.create_batches(len(dev_set), batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) #save model saver = tf.train.Saver() if is_restore: saver.restore(sess, "./checkpoints/model.ckpt") for epoch in range(training_epochs): train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) #------------------------------- # train for switch in xrange(0, 2): if switch == 0: optim = model.optim_dec print_mode = 'updating decoder' else: optim = model.optim_enc print_mode = 'updating encoder' for i in xrange(alternate_epochs): loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in train_batches: data_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, FLAGS.vocab_size) input_feed = { model.x.name: data_batch, model.mask.name: mask } _, (loss, kld) = sess.run( (optim, [model.objective, model.kld]), input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(train_batches) with open(train_csv_filename, 'a') as train_csv: train_writer = csv.writer(train_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) train_writer.writerow([ epoch + 1, print_mode, i, print_ppx, print_ppx_perdoc, print_kld ]) print( '| Epoch train: {:d} |'.format(epoch + 1), print_mode, '{:d}'.format(i), '| Corpus ppx: {:.5f}'.format( print_ppx), # perplexity for all docs '| Per doc ppx: {:.5f}'.format( print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld)) #------------------------------- # dev loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in dev_batches: data_batch, count_batch, mask = utils.fetch_data( dev_set, dev_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(dev_batches) with open(dev_csv_filename, 'a') as dev_csv: dev_writer = csv.writer(dev_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) dev_writer.writerow( [epoch + 1, print_ppx, print_ppx_perdoc, print_kld]) print('| Epoch dev: {:d} |'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld)) #------------------------------- # test if FLAGS.test: loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in test_batches: data_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} loss, kld = sess.run([model.objective, model.kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum / len(test_batches) with open(test_csv_filename, 'a') as test_csv: test_writer = csv.writer(test_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) test_writer.writerow( [epoch + 1, print_ppx, print_ppx_perdoc, print_kld]) print('| Epoch test: {:d} |'.format(epoch + 1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld))
# world_size (int, optional) – Number of processes participating in the job # init_method (str, optional) – URL specifying how to initialize the process group. Default is “env://” if no init_method or store is specified. Mutually exclusive with store. # setup() words = read_words( '/users/PAS1588/liuluyu0378/lab1/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled', seq_len, kernel[0]) word_counter = collections.Counter(words).most_common(vocab_size - 1) vocab = [w for w, _ in word_counter] w2i = dict((w, i) for i, w in enumerate(vocab, 1)) w2i['<unk>'] = 0 print('vocab_size', vocab_size) print('w2i size', len(w2i)) data = [w2i[w] if w in w2i else 0 for w in words] data = create_batches(data, batch_size, seq_len) split_idx = int(len(data) * 0.8) training_data = data[:split_idx] test_data = data[split_idx:] print('train samples:', len(training_data)) print('test samples:', len(test_data)) model = GatedCNN(seq_len, vocab_size, embd_size, n_layers, kernel, out_chs, res_block_count, vocab_size) cuda = None if torch.cuda.is_available(): print("cuda") model.cuda() cuda = True else: cuda = False
def test_create_batches(): batches = u.create_batches(range(10), 2) for i, batch in enumerate(batches): assert batch == range(i * 2, i * 2 + 2)
def train_x( self, dev_set_with_lab, dev_set_without_lab, dev_set_y, train_set_with_lab, train_set_without_lab, train_set_y, test_set, test_set_y, to_label, model_name, # 10 warm_up_period=100, n_dropout_rounds=100, max_learning_iterations=100, no_improvement_iterations=15, semi_supervised=True, debug=True, it=1): sess = tf.Session() sess.run(tf.global_variables_initializer()) self.it = it is_training = False dev_batches_with_lab, dev_batches_without_lab = utils.create_batches_new( len(dev_set_y), len(dev_set_without_lab), self.batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set_y), self.batch_size, shuffle=False) # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', # sess.graph) warm_up = 0 min_alpha = 0.001 # best_print_ana_ppx = 1e10 no_improvement_iters = 0 stopped = False epoch = -1 while not stopped: epoch += 1 train_batches_with_lab, train_batches_without_lab = utils.create_batches_new( len(train_set_with_lab), len(train_set_without_lab), self.batch_size, shuffle=True) if warm_up < 1.: warm_up += 1. / warm_up_period else: warm_up = 1. self.run_model(train_batches_with_lab, train_set_with_lab, train_set_y, train_batches_without_lab, train_set_without_lab, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=self.optim, keep_prop=0.75, print_statement="training", training=True) print_ana_loss, _, _ = self.run_model(dev_batches_with_lab, dev_set_with_lab, dev_set_y, dev_batches_without_lab, dev_set_without_lab, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=None, keep_prop=1.0, print_statement="dev", training=is_training) if debug: _, f1_measure_test, _ = self.run_model(test_batches, test_set, test_set_y, test_batches, test_set, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=None, keep_prop=1.0, print_statement="test", training=is_training) print("TEST F1:", f1_measure_test) if print_ana_loss < best_print_ana_ppx: no_improvement_iters = 0 best_print_ana_ppx = print_ana_loss #tf.train.Saver().save(sess, model_name + '/improved_model') else: no_improvement_iters += 1 print("No improvement: ", no_improvement_iters, "epoch:", epoch) if no_improvement_iters >= no_improvement_iterations: break # print("load best dev f1 model...") #tf.train.Saver().restore(sess, model_name + '/improved_model') _, f1_measure, prop_clss = self.run_model(test_batches, test_set, test_set_y, test_batches, test_set, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=None, keep_prop=1.0, print_statement="test", training=is_training, model_name=model_name) data_batch_labeled, mask = utils.fetch_data_without_idx_new( to_label, self.vocab_size) data_batch_y, data_batch_y_neg, data_batch_y_pos = utils.fetch_data_y_dummy_new( to_label, self.n_class) input_feed = { self.x_labeled: data_batch_labeled, self.x_unlabeled: data_batch_labeled, self.y_labeled: data_batch_y, self.y_neg: data_batch_y_neg, self.y_pos: data_batch_y_pos, self.mask.name: mask, self.keep_prob: 0.75, self.warm_up: warm_up, self.min_alpha: min_alpha, self.training.name: is_training } prediction = [ sess.run(([self.out_y]), input_feed) for _ in range(n_dropout_rounds) ] return f1_measure[1], prediction, prop_clss
def evaluate(model, training_data, training_count, session, step, train_loss=None, epoch=None, summaries=None, writer=None, saver=None): #Get theta for the H1. data_url = os.path.join(FLAGS.data_dir, 'valid_h1.feat' if step != 'test' else 'test_h1.feat') dataset, dataset_count = utils.data_set(data_url) data_batches = utils.create_batches(len(dataset), FLAGS.batch_size, shuffle=False) theta = [] for idx_batch in data_batches: data_batch, count_batch, mask = utils.fetch_data(dataset, dataset_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} logit_theta = session.run(model.doc_vec, input_feed) theta.append(softmax(logit_theta, axis=1)) theta = np.concatenate(theta, axis=0) weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder/projection/Matrix:0')[0].eval(session) bias = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder/projection/Bias:0')[0].eval(session) beta = softmax(weights + bias, axis=1) #H2 to calculate perplexity. data_url = os.path.join(FLAGS.data_dir, 'valid_h2.feat' if step != 'test' else 'test_h2.feat') dataset, dataset_count = utils.data_set(data_url) data_batches = utils.create_batches(len(dataset), FLAGS.batch_size, shuffle=False) test_data = [utils.fetch_data(dataset, dataset_count, idx_batch, FLAGS.vocab_size)[0] for idx_batch in data_batches] test_data = np.concatenate(test_data, axis=0) perplexity = get_perplexity(test_data, theta, beta) coherence = get_topic_coherence(beta, training_data, 'nvdm') if step == 'test' else np.nan diversity = get_topic_diversity(beta, 'nvdm') if step == 'test' else np.nan if step == 'val': #tloss = tf.get_default_graph().get_tensor_by_name('tloss:0') #vppl = tf.get_default_graph().get_tensor_by_name('vppl:0') #weight_summaries = session.run(summaries, feed_dict={tloss: train_loss, vppl: perplexity}) #weight_summaries = summaries.eval(session=session) #writer.add_summary(weight_summaries, epoch) save_path = saver.save(session, os.path.join(ckpt, 'model.ckpt')) print("Model saved in path: %s" % ckpt) print('| Epoch dev: {:d} |'.format(epoch+1)) else: ## get most used topics cnt = 0 thetaWeightedAvg = np.zeros((1, FLAGS.n_topic)) data_batches = utils.create_batches(len(training_data), FLAGS.batch_size, shuffle=False) for idx_batch in data_batches: batch, count_batch, mask = utils.fetch_data(training_data, training_count, idx_batch, FLAGS.vocab_size) sums = batch.sum(axis=1) cnt += sums.sum(axis=0) input_feed = {model.x.name: batch, model.mask.name: mask} logit_theta = session.run(model.doc_vec, input_feed) theta = softmax(logit_theta, axis=1) weighed_theta = (theta.T * sums).T thetaWeightedAvg += weighed_theta.sum(axis=0) thetaWeightedAvg = thetaWeightedAvg.squeeze() / cnt print('\nThe 10 most used topics are {}'.format(thetaWeightedAvg.argsort()[::-1][:10])) with open(FLAGS.data_dir + '/vocab.new', 'rb') as f: vocab = pkl.load(f) topic_indices = list(np.random.choice(FLAGS.n_topic, 10)) # 10 random topics print('\n') with open(ckpt + '/topics.txt', 'w') as f: for k in range(FLAGS.n_topic): gamma = beta[k] top_words = list(gamma.argsort()[-FLAGS.n_words+1:][::-1]) topic_words = [vocab[a] for a in top_words] f.write(str(k) + ' ' + str(topic_words) + '\n') print('Topic {}: {}'.format(k, topic_words)) with open(ckpt + '/' + step + '_scores.csv', 'a') as handle: handle.write(str(perplexity) + ',' + str(coherence) + ',' + str(diversity) + '\n')
#%% model = models.MLPmod(7, dimensions, nn.ReLU) out = model(X) out.shape #%% model = models.MLPmod(7, dimensions, nn.ReLU) mae_trains = [] optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.MSELoss() #x_test, target_test = utils.create_batches(X, Y, 64, 1) for epoch in range(1500): x, y = utils.create_batches(X, Y, 256, 1) x = torch.tensor(x).type(dtype=torch.float) y = torch.tensor(y).type(dtype=torch.float) model.train() output = model(x) loss = criterion(output, y) optimizer.zero_grad() loss.backward() optimizer.step() model.eval() with torch.no_grad():
def train(sess, model, train_url, batch_size, training_epochs=1000, alternate_epochs=10): train_set, train_count = utils.data_set(train_url) summaries = None#get_summaries(sess) writer = None#tf.summary.FileWriter(ckpt + '/logs/', sess.graph) saver = tf.train.Saver() sess.graph.finalize() total_mem = 0 mem = 0 for epoch in range(training_epochs): train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) for switch in range(0, 2): if switch == 0: optim = model.optim_dec print_mode = 'updating decoder' else: optim = model.optim_enc print_mode = 'updating encoder' for i in range(alternate_epochs): loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 word_count = 0 doc_count = 0 for idx_batch in train_batches: data_batch, count_batch, mask = utils.fetch_data(train_set, train_count, idx_batch, FLAGS.vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask} _, (loss, kld) = sess.run((optim, [model.objective, model.kld]), input_feed) #loss, kld = tf.cast(loss, tf.float64), tf.cast(kld, tf.float64) loss_sum += np.sum(loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum/len(train_batches) print('| Epoch train: {:d} |'.format(epoch+1), print_mode, '{:d}'.format(i), '| Corpus ppx: {:.5f}'.format(print_ppx), # perplexity for all docs '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld)) evaluate(model, train_set, train_count, sess, 'val', (loss_sum + kld_sum), epoch, summaries, writer, saver) current_mem = process.memory_info().rss / (1024 ** 2) total_mem += (current_mem - mem) print("Memory increase: {}, Cumulative memory: {}, and current {} in MB".format(current_mem - mem, total_mem, current_mem)) mem = current_mem gc.collect()
def train(sess, model, train_url, test_url, batch_size, vocab_size, training_epochs=200, alternate_epochs=1,#10 lexicon=[], result_file='test.txt', B=1, warm_up_period=100): """train nvdm model.""" train_set, train_count = utils.data_set(train_url) test_set, test_count = utils.data_set(test_url) # hold-out development dataset train_size=len(train_set) validation_size=int(train_size*0.1) dev_set = train_set[:validation_size] dev_count = train_count[:validation_size] train_set = train_set[validation_size:] train_count = train_count[validation_size:] print('sizes',train_size,validation_size,len(dev_set),len(train_set)) optimize_jointly = True dev_batches = utils.create_batches(len(dev_set), batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), batch_size, shuffle=False) warm_up = 0 start_min_alpha = 0.00001 min_alpha = start_min_alpha warm_up_alpha=False start_B=4 curr_B=B #for early stopping best_print_ana_ppx=1e10 early_stopping_iters=30 no_improvement_iters=0 stopped=False epoch=-1 #for epoch in range(training_epochs): while not stopped: epoch+=1 train_batches = utils.create_batches(len(train_set), batch_size, shuffle=True) if warm_up<1.: warm_up += 1./warm_up_period else: warm_up=1. # train #for switch in range(0, 2): if optimize_jointly: optim = model.optim_all print_mode = 'updating encoder and decoder' elif switch == 0: optim = model.optim_dec print_mode = 'updating decoder' else: optim = model.optim_enc print_mode = 'updating encoder' for i in range(alternate_epochs): loss_sum = 0.0 ana_loss_sum = 0.0 ppx_sum = 0.0 kld_sum = 0.0 ana_kld_sum = 0.0 word_count = 0 doc_count = 0 recon_sum=0.0 for idx_batch in train_batches: data_batch, count_batch, mask = utils.fetch_data( train_set, train_count, idx_batch, vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask,model.keep_prob.name: 0.75,model.warm_up.name: warm_up,model.min_alpha.name:min_alpha,model.B.name: curr_B} _, (loss,recon, kld,ana_loss,ana_kld) = sess.run((optim, [model.true_objective, model.recons_loss, model.kld,model.analytical_objective,model.analytical_kld]), input_feed) loss_sum += np.sum(loss) ana_loss_sum += np.sum(ana_loss) kld_sum += np.sum(kld) / np.sum(mask) ana_kld_sum += np.sum(ana_kld) / np.sum(mask) word_count += np.sum(count_batch) # to avoid nan error count_batch = np.add(count_batch, 1e-12) # per document loss ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) recon_sum+=np.sum(recon) print_loss = recon_sum/len(train_batches) dec_vars = utils.variable_parser(tf.trainable_variables(), 'decoder') phi = dec_vars[0] phi = sess.run(phi) utils.print_top_words(phi, lexicon,result_file=None) print_ppx = np.exp(loss_sum / word_count) print_ana_ppx = np.exp(ana_loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum/len(train_batches) print_ana_kld = ana_kld_sum/len(train_batches) print('| Epoch train: {:d} |'.format(epoch+1), print_mode, '{:d}'.format(i), '| Corpus ppx: {:.5f}'.format(print_ppx), # perplexity for all docs '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld), '| Loss: {:.5}'.format(print_loss), '| ppx anal.: {:.5f}'.format(print_ana_ppx), '|KLD anal.: {:.5f}'.format(print_ana_kld)) if warm_up_alpha: if min_alpha>0.0001: min_alpha-=(start_min_alpha-0.0001)/training_epochs #------------------------------- # dev loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 recon_sum=0.0 print_ana_ppx = 0.0 ana_loss_sum = 0.0 for idx_batch in dev_batches: data_batch, count_batch, mask = utils.fetch_data( dev_set, dev_count, idx_batch, vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask,model.keep_prob.name: 1.0,model.warm_up.name: 1.0,model.min_alpha.name:min_alpha,model.B.name: B}#,model.B.name: B loss,recon, kld,ana_loss = sess.run([model.objective, model.recons_loss, model.analytical_kld,model.analytical_objective], input_feed) loss_sum += np.sum(loss) ana_loss_sum += np.sum(ana_loss) kld_sum += np.sum(kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) recon_sum+=np.sum(recon) print_ana_ppx = np.exp(ana_loss_sum / word_count) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum/len(dev_batches) print_loss = recon_sum/len(dev_batches) if print_ana_ppx<best_print_ana_ppx: no_improvement_iters=0 best_print_ana_ppx=print_ana_ppx #check on validation set, if ppx better-> save improved model tf.train.Saver().save(sess, 'models/improved_model_bernoulli') else: no_improvement_iters+=1 print('no_improvement_iters',no_improvement_iters,'best ppx',best_print_ana_ppx) if no_improvement_iters>=early_stopping_iters: #if model has not improved for 30 iterations, stop training ###########STOP TRAINING############ stopped=True print('stop training after',epoch,'iterations,no_improvement_iters',no_improvement_iters) ###########LOAD BEST MODEL########## print('load stored model') tf.train.Saver().restore(sess,'models/improved_model_bernoulli') print('| Epoch dev: {:d} |'.format(epoch+1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld) , '| Loss: {:.5}'.format(print_loss)) #------------------------------- # test #if epoch%10==0 or epoch==training_epochs-1: if FLAGS.test: #if epoch==training_epochs-1: if stopped: #only do it once in the end coherence=utils.topic_coherence(test_set,phi, lexicon) print('topic coherence',str(coherence)) loss_sum = 0.0 kld_sum = 0.0 ppx_sum = 0.0 word_count = 0 doc_count = 0 recon_sum = 0.0 ana_loss_sum = 0.0 ana_kld_sum = 0.0 for idx_batch in test_batches: data_batch, count_batch, mask = utils.fetch_data( test_set, test_count, idx_batch, vocab_size) input_feed = {model.x.name: data_batch, model.mask.name: mask,model.keep_prob.name: 1.0,model.warm_up.name: 1.0,model.min_alpha.name:min_alpha,model.B.name: B} loss, recon,kld,ana_loss,ana_kld = sess.run([model.objective, model.recons_loss,model.kld,model.analytical_objective,model.analytical_kld], input_feed) loss_sum += np.sum(loss) kld_sum += np.sum(kld)/np.sum(mask) ana_loss_sum += np.sum(ana_loss) ana_kld_sum += np.sum(ana_kld) / np.sum(mask) word_count += np.sum(count_batch) count_batch = np.add(count_batch, 1e-12) ppx_sum += np.sum(np.divide(loss, count_batch)) doc_count += np.sum(mask) recon_sum+=np.sum(recon) print_loss = recon_sum/len(test_batches) print_ppx = np.exp(loss_sum / word_count) print_ppx_perdoc = np.exp(ppx_sum / doc_count) print_kld = kld_sum/len(test_batches) print_ana_ppx = np.exp(ana_loss_sum / word_count) print_ana_kld = ana_kld_sum/len(train_batches) print('| Epoch test: {:d} |'.format(epoch+1), '| Perplexity: {:.9f}'.format(print_ppx), '| Per doc ppx: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld), '| Loss: {:.5}'.format(print_loss), '| ppx anal.: {:.5f}'.format(print_ana_ppx), '|KLD anal.: {:.5f}'.format(print_ana_kld))
def model(batch_size, lr, dims, numEpochs, cuda, alpha, pathLoad, pathSave, epochSave, activation, modelType, computeEigVectorsOnline, regularizerFcn, _seed, _run): """ Function for creating and training MLPs on MNIST. :param batch_size: specifies batch size :param rlr: learning rate of stochastic optimizer :param dims: A list of N tuples that specifies the input and output sizes for the FC layers. where the last layer is the output layer :param numEpochs: number of epochs to train the network for :param cuda: boolean variable that will specify whether to use the GPU or nt :param alpha: weight for regularizer on spectra. If 0, the regularizer will not be used :param pathLoad: path to where MNIST lives :param pathSave: path specifying where to save the models :param epochSave: integer specifying how often to save loss :param activation: string that specified whether to use relu or not :param _seed: seed for RNG :param _run: Sacred object that logs the relevant data and stores them to a database :param computeEigVectorsOnline: online or offline eig estimator :param regularizerFcn: function name that computes the discrepancy between the idealized and empirical eigs """ device = 'cuda' if cuda == True else 'cpu' os.makedirs(pathSave, exist_ok=True) npr.seed(_seed) torch.manual_seed(_seed + 1) alpha = alpha * torch.ones(1, device=device) "Load in MNIST" fracVal = 0.1 train, val, test = split_mnist(pathLoad, fracVal) trainData, trainLabels = train[0], train[1] valData, valLabels = val[0], val[1] testData, testLabels = test[0], test[1] numSamples = trainData.shape[0] if modelType == 'mlp': model = MLP(dims, activation=activation) # create a mlp object elif modelType == 'cnn': model = CNN(dims, activation=activation) # create a CNN object else: print('WOAHHHHH RELAX') model = model.to(device) lossFunction = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) "Objects used to store performance metrics while network is training" trainSpectra = [] # store the (estimated) spectra of the network at the end of each epoch trainLoss = [] # store the training loss (reported at the end of each epoch on the last batch) trainRegularizer = [] # store the value of the regularizer during training valLoss = [] # validation loss valRegularizer = [] # validation regularizer "Sample indices for eigenvectors all at once" eigBatchIdx = npr.randint(numSamples, size=(numEpochs + 1, batch_size)) "Get initial estimate of eigenvectors and check loss" with torch.no_grad(): model.eigVec, loss, spectraTemp, regul = computeEigVectors(model, trainData[eigBatchIdx[0, :], :], trainLabels[eigBatchIdx[0, :]], lossFunction, alpha=alpha, cuda=cuda) trainSpectra.append(spectraTemp) # store computed eigenspectra trainLoss.append(loss.cpu().item()) # store training loss _run.log_scalar("trainLoss", loss.item()) _run.log_scalar("trainRegularizer", float(alpha * regul) ) trainRegularizer.append(alpha * regul) # store value of regularizer "Check on validation set" loss, regul = compute_loss(model, valData, valLabels, lossFunction, alpha, cuda=cuda) valLoss.append(loss.item()) _run.log_scalar("valLoss", loss.item()) valRegularizer.append(regul) prevVal = loss.item() + alpha * regul.item() # use for early stopping prevModel = copy.deepcopy(model) patience = 0 howMuchPatience = 4 "Train that bad boy!" for epoch in tqdm(range(numEpochs), desc="Epochs", ascii=True, position=0, leave=False): batches = create_batches(batch_size=batch_size, numSamples=numSamples) # create indices for batches for batch in tqdm(batches, desc='Train Batches', ascii=True, position=1, leave=False): optimizer.zero_grad() "Compute a forward pass through the network" loss, regul = compute_loss(model, trainData[batch, :], trainLabels[batch], lossFunction, alpha, cuda=cuda) lossR = loss + alpha * regul # compute augmented loss function lossR.backward() # backprop! optimizer.step() # take a gradient step "Recompute estimated eigenvectors" with torch.no_grad(): model.eigVec, loss, spectraTemp, regul = computeEigVectors(model, trainData[eigBatchIdx[epoch + 1, :], :], trainLabels[eigBatchIdx[epoch + 1, :]], lossFunction, alpha=alpha, cuda=cuda) trainSpectra.append(spectraTemp) # store computed eigenspectra trainLoss.append(loss.cpu().item()) # store training loss _run.log_scalar("trainLoss", loss.item()) trainRegularizer.append(alpha * regul) # store value of regularizer if (epoch + 1) % epochSave == 0: "Check early stopping condition" loss, regul = compute_loss(model, valData, valLabels, lossFunction, alpha, cuda=cuda) currVal = loss.item() + alpha * regul.item() percentImprove = (currVal - prevVal) / prevVal if percentImprove > 0: if patience > howMuchPatience: model = prevModel break else: patience += 1 else: patience = 0 prevVal = currVal prevModel = copy.deepcopy(model) # save for early stopping valLoss.append(loss.item()) _run.log_scalar("valLoss", loss.item()) valRegularizer.append(regul.item()) _run.log_scalar("valRegularizer", regul.item()) "Check accuracy on test set" outputs = model(testData.to(device)) softMax = nn.Softmax(dim=1) probs = softMax(outputs.cpu()) numCorrect = torch.sum(torch.argmax(probs, dim=1) == testLabels).detach().numpy() * 1.0 testResult = numCorrect / testData.shape[0] * 100 "Collect accuracy on validation set" outputs = model(valData.to(device)) softMax = nn.Softmax(dim=1) probs = softMax(outputs).cpu() numCorrect = torch.sum(torch.argmax(probs, dim=1) == valLabels).detach().numpy() * 1.0 valAcc = numCorrect / valData.shape[0] * 100 _run.log_scalar("valAcc", valAcc.item()) "Save everything for later analysis" model_data = {'parameters': model.cpu().state_dict(), 'training': (trainLoss, trainRegularizer, trainSpectra), 'val': (valLoss, valRegularizer, valAcc), 'test': testResult} if modelType == 'cnn': dims = dims[1:] # first number is number of convolutional layers path = pathSave + modelType + '_' + activation + '_hidden=(' for idx in range(len(dims) - 1): path = path + str(dims[idx][1]) + ',' path = path + str(dims[-1][1]) + ')_lr=' + str(lr) + '_alpha=' + str(alpha) + '_batch_size=' \ + str(batch_size) + '_seed=' + str(_seed) + '_epochs=' + str(numEpochs) torch.save(model_data, path) _run.add_artifact(path, "model_data.pt", content_type="application/octet-stream") # saves the data dump as model_data # os.system('ls -l --block-size=M {}'.format(path)) # shutil.rmtree(pathSave) # Returning the validation loss to do model comparision and selection return valAcc
cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches print("generating batches") batches = utils.create_batches(X_train, Y_train, FLAGS.batch_size, FLAGS.num_epochs) print("training") # Training loop. For each batch... for batch in batches: x_batch, y_batch = batch train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(X_test, Y_true, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train_x( self, dev_set_with_lab, dev_set_without_lab, dev_set_y, train_set_with_lab, train_set_without_lab, train_set_y, test_set, test_set_y, to_label, model_name, # 10 lexicon=[], warm_up_period=100, n_dropout_rounds=100, max_learning_iterations=100, min_learning_iterations=35, no_improvement_iterations=15, semi_supervised=True, debug=True, it=0): sess = tf.Session() sess.run(tf.global_variables_initializer()) is_training = False dev_batches_with_lab = utils.create_batches(len(dev_set_y), self.batch_size, shuffle=False) dev_batches_without_lab = utils.create_batches( len(dev_set_without_lab), self.batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set_y), self.batch_size, shuffle=False) labeled_training_rounds = math.ceil( float(len(train_set_without_lab)) / float(len(train_set_with_lab))) labeled_dev_rounds = math.ceil( float(len(dev_set_without_lab)) / float(len(dev_set_with_lab))) # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', # sess.graph) warm_up = 0 min_alpha = 0.001 # best_print_ana_ppx = 1e10 no_improvement_iters = 0 stopped = False epoch = -1 while not stopped: epoch += 1 train_batches_with_lab = utils.create_batches( len(train_set_with_lab), self.batch_size, shuffle=True) train_batches_without_lab = utils.create_batches( len(train_set_without_lab), self.batch_size, shuffle=True) if warm_up < 1.: warm_up += 1. / warm_up_period else: warm_up = 1. optim = self.optim_all self.run_model(labeled_training_rounds, train_batches_with_lab, train_set_with_lab, train_set_y, train_batches_without_lab, train_set_without_lab, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=optim, keep_prop=0.75, print_statement="training", training=True) print_ana_loss, print_loss1, f1_measure, _ = self.run_model( labeled_dev_rounds, dev_batches_with_lab, dev_set_with_lab, dev_set_y, dev_batches_without_lab, dev_set_without_lab, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=None, keep_prop=1.0, print_statement="dev", training=is_training) if print_ana_loss < best_print_ana_ppx: no_improvement_iters = 0 best_print_ana_ppx = print_ana_loss #tf.train.Saver().save(sess, model_name + '/improved_model') else: no_improvement_iters += 1 #print("No improvement: ", no_improvement_iters) if no_improvement_iters >= no_improvement_iterations: break # ------------------------------- # test if debug: self.run_model(1, test_batches, test_set, test_set_y, [], None, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=None, keep_prop=1.0, print_statement="TEST", training=is_training) # print("load best dev f1 model...") #tf.train.Saver().restore(sess, model_name + '/improved_model') _, _, f1_measure, test_pred = self.run_model(1, test_batches, test_set, test_set_y, [], None, debug, semi_supervised, epoch, warm_up, min_alpha, sess, optim=None, keep_prop=1.0, print_statement="test", training=is_training) data_batch, mask = utils.fetch_data_without_idx_new( to_label, self.vocab_size) data_batch_y = utils.fetch_data_y_dummy(to_label, self.n_class, 0) input_feed = { self.x.name: data_batch, self.y.name: data_batch_y, self.mask.name: mask, self.keep_prob.name: 0.75, self.warm_up.name: warm_up, self.min_alpha.name: min_alpha, self.prob: 0.75, self.lab: np.zeros((1)), self.idx.name: np.zeros((1), dtype=np.int32), self.training.name: is_training } prediction = [ sess.run(([self.out_y]), input_feed) for _ in range(n_dropout_rounds) ] return f1_measure, prediction, test_pred
def train(hparams, model_design, X, Y, data, data_dir="models/mlp", splits=5): """ """ epochs = hparams["epochs"] kf = KFold(n_splits=splits, shuffle=False) kf.get_n_splits(X) #rmse_train = np.zeros((splits, epochs)) #rmse_val = np.zeros((splits, epochs)) mae_train = np.zeros((splits, epochs)) mae_val = np.zeros((splits, epochs)) i = 0 #performance = [] #y_tests = [] #y_preds = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] X_test = torch.tensor(X_test).type(dtype=torch.float) y_test = torch.tensor(y_test).type(dtype=torch.float) X_train = torch.tensor(X_train).type(dtype=torch.float) y_train = torch.tensor(y_train).type(dtype=torch.float) model = models.MLP(model_design["layer_sizes"]) optimizer = optim.Adam(model.parameters(), lr=hparams["learningrate"]) criterion = nn.MSELoss() #early_stopping = utils.EarlyStopping() for epoch in range(epochs): # Training model.train() x, y = utils.create_batches(X_train, y_train, hparams["batchsize"], hparams["history"]) x = torch.tensor(x).type(dtype=torch.float) y = torch.tensor(y).type(dtype=torch.float) output = model(x) # Compute training loss loss = criterion(output, y) optimizer.zero_grad() loss.backward() optimizer.step() # Evaluate current model at test set model.eval() with torch.no_grad(): pred_train = model(X_train) pred_test = model(X_test) #rmse_train[i, epoch] = utils.rmse(y_train, pred_train) #rmse_val[i, epoch] = utils.rmse(y_test, pred_test) val_loss = metrics.mean_absolute_error(y_test, pred_test) #early_stopping(val_loss) #if early_stopping.early_stop: # break mae_train[i, epoch] = metrics.mean_absolute_error( y_train, pred_train) mae_val[i, epoch] = val_loss # Predict with fitted model #with torch.no_grad(): # preds_train = model(X_train) # preds_test = model(X_test) # performance.append([utils.rmse(y_train, preds_train), # utils.rmse(y_test, preds_test), # metrics.mean_absolute_error(y_train, preds_train.numpy()), # metrics.mean_absolute_error(y_test, preds_test.numpy())]) torch.save(model.state_dict(), os.path.join(data_dir, f"{data}_model{i}.pth")) #y_tests.append(y_test.numpy()) #y_preds.append(preds_test.numpy()) i += 1 running_losses = { "mae_train": mae_train, "mae_val": mae_val } #, "rmse_val":rmse_val, "rmse_train":rmse_train, } return running_losses #, performance #, y_tests, y_preds
"-f", "--restore_file", default="./source_blstm_crf/source_model_crf", help="Path to rebuild the model and restore weights from checkpoint" ) opts = optparser.parse_args()[0] batch_size = config.batch_size word2vec_emb_path = config.word2vec_emb_path glove_emb_path = config.glove_emb_path input_x, input_y = loader.prepare_input(config.datadir+config.train) if opts.char: char_emb, char_to_id, char_seq_len = utils.convert_to_char_emb(input_x) char_layer = BLSTM(config.char_lstm_size) emb_layer = Embedding(opts, word2vec_emb_path, glove_emb_path) seqlen, input_x = utils.convert_to_id(input_x, emb_layer.word_to_id) input_y, tag_to_id = utils.create_and_convert_tag_to_id(input_y) seqlen, inp = utils.create_batches(input_x, seqlen, input_y) num_labels = len(tag_to_id) lstm_size = 100 blstm_layer = BLSTM(lstm_size) ff_layer = FeedForward(2*config.lstm_size, num_labels) if opts.char: #dimension of batch and sequence_len are collapsed as batch_size is 1. char_inp = tf.placeholder("float32", shape=[None, None, len(char_to_id)], name="char_input") char_seqlen = tf.placeholder("int32", shape=[None], name="char_seqlen") batch_input = tf.placeholder("int32", shape=[None, None], name="input") sequence_length = tf.placeholder("int32", shape=[None], name="seqlen") if opts.crf: labels = tf.placeholder("int32", shape=[None, None], name="labels") else: