def predict(model, modules, consts, options): print "start predicting," options["has_y"] = TESTING_DATASET_CLS.HAS_Y if options["beam_decoding"]: print "using beam search" else: print "using greedy search" rebuild_dir(cfg.cc.BEAM_SUMM_PATH) rebuild_dir(cfg.cc.BEAM_GT_PATH) rebuild_dir(cfg.cc.GROUND_TRUTH_PATH) rebuild_dir(cfg.cc.SUMM_PATH) print "loading test set..." xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "ibm.pkl", "r")) batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts) print "num_files = ", num_files, ", num_batches = ", num_batches running_start = time.time() partial_num = 0 total_num = 0 si = 0 for idx_batch in xrange(num_batches): test_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in test_idx] batch = datar.get_data(batch_raw, modules, consts, options) x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \ batch.x_mask, batch.y, batch.len_y, batch.y_mask, \ batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words) word_emb, dec_state = model.encode( torch.LongTensor(x).cuda(), torch.LongTensor(len_x).cuda(), torch.FloatTensor(x_mask).cuda()) if options["beam_decoding"]: for idx_s in xrange(word_emb.size(1)): inputx = (torch.LongTensor(x_ext[:, idx_s]).cuda(), word_emb[:, idx_s, :], dec_state[idx_s, :],\ torch.FloatTensor(x_mask[:, idx_s, :]).cuda(), y[:, idx_s], [len_y[idx_s]], oy[idx_s],\ batch.max_ext_len, oovs[idx_s]) beam_decode_copy(si, inputx, model, modules, consts, options) si += 1 else: inputx = (word_emb, dec_state, torch.FloatTensor(x_mask).cuda(), y, len_y) greedy_decode(test_idx, inputx, model, modules, consts, options) testing_batch_size = len(test_idx) partial_num += testing_batch_size total_num += testing_batch_size if partial_num >= consts["testing_print_size"]: print total_num, "summs are generated" partial_num = 0 print si, total_num
def run(existing_model_name=None, w=64): modules, consts, options = init_modules() if options["is_predicting"]: need_load_model = True training_model = False predict_model = True else: need_load_model = True training_model = True predict_model = False print_basic_info(modules, consts, options) if training_model: print("loading train set...") if options["is_debugging"]: xy_list = pickle.load( open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "rb")) else: xy_list = pickle.load( open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "rb")) batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) print("num_files = ", num_files, ", num_batches = ", num_batches) running_start = time.time() if True: #TODO: refactor print("compiling model ...") model = Model(modules, consts, options) if options["cuda"]: model.cuda() optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1) model_name = "".join(["cnndm.s2s.", options["cell"]]) existing_epoch = 0 if need_load_model: if existing_model_name == None: existing_model_name = "cnndm.s2s.transformer.gpu0.epoch27.2" print("loading existed model:", existing_model_name) model, optimizer = load_model( cfg.cc.MODEL_PATH + existing_model_name, model, optimizer) if training_model: print("start training model ") model.train() print_size = num_files // consts[ "print_time"] if num_files >= consts[ "print_time"] else num_files last_total_error = float("inf") print("max epoch:", consts["max_epoch"]) for epoch in range(0, consts["max_epoch"]): print("epoch: ", epoch + existing_epoch) num_partial = 1 total_error = 0.0 error_c = 0.0 partial_num_files = 0 epoch_start = time.time() partial_start = time.time() # shuffle the trainset batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) used_batch = 0. for idx_batch in range(num_batches): train_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in train_idx] if len(batch_raw) != consts["batch_size"]: continue local_batch_size = len(batch_raw) batch = datar.get_data(batch_raw, modules, consts, options) # print(batch.x.shape,attention_mask.shape,batch.y.shape,batch.y_ext.shape) model.zero_grad() y_pred, cost = model(torch.LongTensor(batch.x).to(options["device"]),\ torch.LongTensor(batch.y_inp).to(options["device"]),\ torch.LongTensor(batch.y).to(options["device"]),\ torch.FloatTensor(batch.x_mask).to(options["device"]),\ torch.FloatTensor(batch.y_mask).to(options["device"]),\ torch.LongTensor(batch.x_ext).to(options["device"]),\ torch.LongTensor(batch.y_ext).to(options["device"]),\ batch.max_ext_len) cost.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), consts["norm_clip"]) optimizer.step() cost = cost.item() total_error += cost used_batch += 1 partial_num_files += consts["batch_size"] if partial_num_files // print_size == 1 and idx_batch < num_batches: print (idx_batch + 1, "/" , num_batches, "batches have been processed,", \ "average cost until now:", "cost =", total_error / used_batch, ",", \ "cost_c =", error_c / used_batch, ",", \ "time:", time.time() - partial_start) partial_num_files = 0 # if not options["is_debugging"]: # print("save model... ",) # # file_name = model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial) # file_name = model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial) # save_model(cfg.cc.MODEL_PATH + file_name, model, optimizer) # if options["fire"]: # shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/") # print("finished") num_partial += 1 print ("in this epoch, total average cost =", total_error / used_batch, ",", \ "cost_c =", error_c / used_batch, ",",\ "time:", time.time() - epoch_start) print_sent_dec(y_pred, batch.y, batch.y_mask, batch.x_ext_words, modules, consts, options, local_batch_size) if last_total_error > total_error or options["is_debugging"]: last_total_error = total_error if not options["is_debugging"]: print("save model... ", ) # file_name = model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial) file_name = model_name + ".gpu" + str( consts["idx_gpu"]) + ".best_model_baseline" save_model(cfg.cc.MODEL_PATH + file_name, model, optimizer) if options["fire"]: shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/") print("finished") else: print("optimization finished") break print("save final model... "), file_name = model_name + ".final.gpu" + str( consts["idx_gpu"]) + ".epoch" + str( epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial) save_model(cfg.cc.MODEL_PATH + file_name, model, optimizer) if options["fire"]: shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/") print("finished") else: print("skip training model") if predict_model: predict(model, modules, consts, options) print("Finished, time:", time.time() - running_start)
def predict(model, modules, consts, options): print("start predicting,") model.eval() options["has_y"] = TESTING_DATASET_CLS.HAS_Y if options["beam_decoding"]: print("using beam search") else: print("using greedy search") rebuild_dir(cfg.cc.BEAM_SUMM_PATH) rebuild_dir(cfg.cc.BEAM_GT_PATH) rebuild_dir(cfg.cc.GROUND_TRUTH_PATH) rebuild_dir(cfg.cc.SUMM_PATH) print("loading test set...") if options["model_selection"]: xy_list = pickle.load( open(cfg.cc.VALIDATE_DATA_PATH + "pj1000.pkl", "rb")) else: xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "rb")) batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts) print("num_files = ", num_files, ", num_batches = ", num_batches) running_start = time.time() partial_num = 0 total_num = 0 si = 0 for idx_batch in range(num_batches): test_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in test_idx] batch = datar.get_data(batch_raw, modules, consts, options) assert len(test_idx) == batch.x.shape[1] # local_batch_size word_emb, padding_mask = model.encode( torch.LongTensor(batch.x).to(options["device"])) if options["beam_decoding"]: for idx_s in range(len(test_idx)): if options["copy"]: inputx = (torch.LongTensor(batch.x_ext[:, idx_s]).to(options["device"]), \ torch.FloatTensor(batch.x_mask[:, idx_s, :]).to(options["device"]), \ word_emb[:, idx_s, :], \ padding_mask[:, idx_s],\ batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s],\ batch.max_ext_len, batch.x_ext_words[idx_s]) else: inputx = (torch.LongTensor(batch.x[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], padding_mask[:, idx_s],\ batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s]) beam_decode(si, inputx, model, modules, consts, options) si += 1 else: pass #greedy_decode() testing_batch_size = len(test_idx) partial_num += testing_batch_size total_num += testing_batch_size if partial_num >= consts["testing_print_size"]: print(total_num, "summs are generated") partial_num = 0 print(si, total_num)
def run(existing_model_name=None): modules, consts, options = init_modules() #use_gpu(consts["idx_gpu"]) if options["is_predicting"]: need_load_model = True training_model = False predict_model = True else: need_load_model = False training_model = True predict_model = False print_basic_info(modules, consts, options) if training_model: print "loading train set..." if options["is_debugging"]: xy_list = pickle.load( open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "r")) else: xy_list = pickle.load( open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "r")) batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) print "num_files = ", num_files, ", num_batches = ", num_batches running_start = time.time() if True: #TODO: refactor print "compiling model ..." model = Model(modules, consts, options) #criterion = nn.NLLLoss(ignore_index=consts["pad_token_idx"]) if options["cuda"]: model.cuda() #criterion.cuda() #model = nn.DataParallel(model) optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1) model_name = "cnndm.s2s" existing_epoch = 0 if need_load_model: if existing_model_name == None: existing_model_name = "cnndm.s2s.gpu5.epoch5.5" print "loading existed model:", existing_model_name model, optimizer = load_model( cfg.cc.MODEL_PATH + existing_model_name, model, optimizer) if training_model: print "start training model " print_size = num_files / consts[ "print_time"] if num_files >= consts[ "print_time"] else num_files last_total_error = float("inf") print "max epoch:", consts["max_epoch"] for epoch in xrange(0, consts["max_epoch"]): ''' if not options["is_debugging"] and epoch == 5: consts["lr"] *= 0.1 #adjust for param_group in optimizer.param_groups: param_group['lr'] = consts["lr"] ''' print "epoch: ", epoch + existing_epoch num_partial = 1 total_error = 0.0 partial_num_files = 0 epoch_start = time.time() partial_start = time.time() # shuffle the trainset batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) used_batch = 0. for idx_batch in xrange(num_batches): train_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in train_idx] if len(batch_raw) != consts["batch_size"]: continue local_batch_size = len(batch_raw) batch = datar.get_data(batch_raw, modules, consts, options) x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \ batch.x_mask, batch.y, batch.len_y, batch.y_mask, \ batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words) model.zero_grad() y_pred, cost = model(torch.LongTensor(x).cuda(), torch.LongTensor(len_x).cuda(),\ torch.LongTensor(y).cuda(), torch.FloatTensor(x_mask).cuda(), \ torch.FloatTensor(y_mask).cuda(), torch.LongTensor(x_ext).cuda(), torch.LongTensor(y_ext).cuda(), \ batch.max_ext_len, None) cost.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 5) optimizer.step() cost = cost.item() total_error += cost used_batch += 1 partial_num_files += consts["batch_size"] if partial_num_files / print_size == 1 and idx_batch < num_batches: print idx_batch + 1, "/", num_batches, "batches have been processed,", print "average cost until now:", "cost =", total_error / used_batch, ",", print "time:", time.time() - partial_start partial_num_files = 0 if not options["is_debugging"]: print "save model... ", save_model( cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print "finished" num_partial += 1 print "in this epoch, total average cost =", total_error / used_batch, ",", print "time:", time.time() - epoch_start print_sent_dec(y_pred, y_ext, y_mask, oovs, modules, consts, options, local_batch_size) if last_total_error > total_error or options["is_debugging"]: last_total_error = total_error if not options["is_debugging"]: print "save model... ", save_model( cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print "finished" else: print "optimization finished" break print "save final model... ", save_model( cfg.cc.MODEL_PATH + model_name + "final.gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print "finished" else: print "skip training model" if predict_model: predict(model, modules, consts, options) print "Finished, time:", time.time() - running_start
def run(): all_losses = [] p_points = [] continuing = False modules, consts, options = init_modules() #use_gpu(consts["idx_gpu"]) print_basic_info(modules, consts, options) if not opt.predict: print "loading train set..." if opt.debug: xy_list = pickle.load( open(cfg.cc.TRAINING_DATA_PATH + "train_small.pkl", "r")) else: xy_list = pickle.load( open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "r")) batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) print "num_files = ", num_files, ", num_batches = ", num_batches running_start = time.time() if True: #TODO: refactor print('model_path', cfg.cc.MODEL_PATH) continue_training = len(os.listdir(cfg.cc.MODEL_PATH)) != 0 options['continue_training'] = continue_training print "compiling model ..." model = Model(modules, consts, options) if options["cuda"]: model.cuda() optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1) existing_epoch = 0 if continue_training or opt.predict or opt.retrain: if opt.model_name == '': opt.model_name = list( reversed( sorted( os.listdir(cfg.cc.MODEL_PATH), key=lambda x: int( re.match('.*step(\d+)', x).groups()[0]))))[0] continue_step = int( re.match('.*step(\d+)', opt.model_name).groups()[0]) name = cfg.cc.MODEL_PATH + opt.model_name else: continue_step = 0 name = opt.model_name print "loading existed model:", name model, optimizer, all_losses, av_batch_losses, p_points, av_batch_p_points = load_model( name, model, optimizer) if opt.retrain: av_batch_losses = np.zeros(5) av_batch_p_points = np.zeros(1) all_losses = [] p_points = [] if options['coverage']: model.decoder.add_cov_weight() if options['cuda']: model.cuda() print(model) if opt.retrain: # update optimizer, because network contains now coverage weights if coverage is on optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1) if continue_training and not opt.predict: continuing = True print('Continue training model from step {}'.format(continue_step)) if not opt.predict: print "start training model " print_size = num_files / consts[ "print_time"] if num_files >= consts[ "print_time"] else num_files steps = 0 print(model) # cnndm.s2s.lstm.gpu0.epoch0.7 last_total_error = float("inf") print "max epoch:", consts["max_epoch"] for epoch in xrange(0, consts["max_epoch"]): print "epoch: ", epoch + existing_epoch num_partial = 1 if not continuing: av_batch_losses = np.zeros(5) av_batch_p_points = np.zeros(1) partial_num_files = 0 epoch_start = time.time() partial_start = time.time() # shuffle the trainset batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) used_batch = 0. y_pred = None for idx_batch in xrange(num_batches): if continue_training and steps <= continue_step: used_batch += 1 init_seeds(steps) steps += 1 partial_num_files += consts["batch_size"] if partial_num_files % print_size == 0 and idx_batch < num_batches: partial_num_files = 0 num_partial += 1 if steps == continue_step: continuing = False continue else: continuing = False train_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in train_idx] if len(batch_raw) != consts["batch_size"]: continue local_batch_size = len(batch_raw) batch = datar.get_data(batch_raw, modules, consts, options) x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \ batch.x_mask, batch.y, batch.len_y, batch.y_mask, \ batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words) model.zero_grad() if opt.tf_schedule: tf = teacher_forcing_ratio(steps, options["tf_offset_decay"]) else: tf = True y_pred, losses, p_point = model(torch.LongTensor(x).to(options["device"]), torch.LongTensor(len_x).to(options["device"]),\ torch.LongTensor(y).to(options["device"]), torch.FloatTensor(x_mask).to(options["device"]), \ torch.FloatTensor(y_mask).to(options["device"]), torch.LongTensor(x_ext).to(options["device"]),\ torch.LongTensor(y_ext).to(options["device"]), \ batch.max_ext_len) total_loss = 0 # TODO: implement averge batch costs for loss_ in losses: if loss_ is not None: total_loss += loss_ total_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), consts["norm_clip"]) optimizer.step() # append total loss to losses losses = np.append(total_loss.item(), losses) # transform tensors to floats losses = [ loss.cpu().detach().numpy() if isinstance( loss, torch.Tensor) else loss for loss in losses ] # with open(opt.result_path + '/result.log', "a") as log_file: # log_file.write("epoch {}, step {}, total_loss {}, loss {}, cost_cov {}, cost_p_point {}, cost_w_prior {}\n".format(epoch, steps,*losses)) # if new batch reset # add current losses to av_batch_losses av_batch_losses = np.add(av_batch_losses, losses) av_batch_p_points = np.add(av_batch_p_points, p_point) used_batch += 1 partial_num_files += consts["batch_size"] if partial_num_files % print_size == 0 and idx_batch < num_batches: print("Step: {}").format(steps) print idx_batch + 1, "/", num_batches, "batches have been processed,", print( "av_batchp_point {}, av_batch: total_loss {}, loss {}, cost_cov {}, cost_p_point {}, cost_w_prior {}" .format(av_batch_p_points / used_batch, *av_batch_losses / used_batch)) print "time:", time.time() - partial_start partial_num_files = 0 if not opt.debug: print "save model... ", save_model( cfg.cc.MODEL_PATH + "model.gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch) + ".step" + str(steps), model, optimizer, all_losses, av_batch_losses, p_points, av_batch_p_points) all_losses.append(av_batch_losses / used_batch) p_points.append(av_batch_p_points / used_batch) print "finished" num_partial += 1 init_seeds(steps) steps += 1 if not continuing: print("in this epoch:") print( "av_batchp_point {}, av_batch: total_loss {}, loss {}, cost_cov {}, cost_p_point {}, cost_w_prior {}" .format(av_batch_p_points / used_batch, *av_batch_losses / used_batch)) print "time:", time.time() - epoch_start if y_pred is not None: print_sent_dec(y_pred, y_ext, y_mask, oovs, modules, consts, options, local_batch_size) if not opt.debug: print "save model... ", pickle.dump( [all_losses, p_points], open(opt.result_path + '/losses_p_points.p', 'wb')) save_model( cfg.cc.MODEL_PATH + "model.gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch) + ".step" + str(steps), model, optimizer, all_losses, av_batch_losses, p_points, av_batch_p_points) print "finished" if not opt.debug: print "save final model... ", save_model( cfg.cc.MODEL_PATH + "model.final.gpu" + str(consts["idx_gpu"]), model, optimizer, all_losses, av_batch_losses, p_points, av_batch_p_points) pickle.dump([all_losses, p_points], open(opt.result_path + '/losses_p_points.p', 'wb')) print "finished" else: print "skip training model" if opt.predict: predict(model, modules, consts, options) print "Finished, time:", time.time() - running_start
def run(existing_model_name = None): modules, consts, options = init_modules() #use_gpu(consts["idx_gpu"]) if options["is_predicting"]: need_load_model = True training_model = False predict_model = True else: need_load_model = False training_model = True predict_model = False print_basic_info(modules, consts, options) if training_model: print "loading train set..." if options["is_debugging"]: xy_list = pickle.load(open(cfg.cc.VALIDATE_DATA_PATH + "pj1000.pkl", "r")) else: xy_list = pickle.load(open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "r")) batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts) print "num_files = ", num_files, ", num_batches = ", num_batches running_start = time.time() if True: #TODO: refactor print "compiling model ..." model = Model(modules, consts, options) if options["cuda"]: model.cuda() #optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1) optimizer = Optim(consts["hidden_size"], 1, 8000,\ torch.optim.Adam(model.parameters(),\ lr=consts["lr"], betas=(0.9, 0.998), eps=1e-9)) model_name = "".join(["cnndm.s2s.", options["cell"]]) existing_epoch = 0 if need_load_model: if existing_model_name == None: existing_model_name = "cnndm.s2s.transformer.gpu4.epoch9.3" print "loading existed model:", existing_model_name model, optimizer = load_model(cfg.cc.MODEL_PATH + existing_model_name, model, optimizer) if training_model: model.train() # !!!!!!! print "start training model " print_size = num_files / consts["print_time"] if num_files >= consts["print_time"] else num_files last_total_error = float("inf") print "max epoch:", consts["max_epoch"] for epoch in xrange(0, consts["max_epoch"]): print "epoch: ", epoch + existing_epoch num_partial = 1 total_error = 0.0 error_c = 0.0 partial_num_files = 0 epoch_start = time.time() partial_start = time.time() # shuffle the trainset batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts) used_batch = 0. for idx_batch in xrange(num_batches): train_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in train_idx] if len(batch_raw) != consts["batch_size"]: continue local_batch_size = len(batch_raw) batch = datar.get_data(batch_raw, modules, consts, options) model.zero_grad() y_pred, cost, cost_c = model(torch.LongTensor(batch.x).to(options["device"]),\ torch.LongTensor(batch.px).to(options["device"]),\ torch.LongTensor(batch.pxs).to(options["device"]),\ torch.FloatTensor(batch.x_mask).to(options["device"]),\ torch.LongTensor(batch.y_inp).to(options["device"]),\ torch.LongTensor(batch.py).to(options["device"]),\ torch.LongTensor(batch.pys).to(options["device"]),\ torch.FloatTensor(batch.y_mask_tri).to(options["device"]),\ torch.LongTensor(batch.y).to(options["device"]),\ torch.FloatTensor(batch.y_mask).to(options["device"]),\ torch.LongTensor(batch.x_ext).to(options["device"]),\ torch.LongTensor(batch.y_ext).to(options["device"]),\ batch.max_ext_len) if cost_c is None: loss = cost else: loss = cost + cost_c cost_c = cost_c.item() error_c += cost_c loss.backward() #torch.nn.utils.clip_grad_norm_(model.parameters(), consts["norm_clip"]) optimizer.step() cost = cost.item() total_error += cost used_batch += 1 partial_num_files += consts["batch_size"] if partial_num_files / print_size == 1 and idx_batch < num_batches: print idx_batch + 1, "/" , num_batches, "batches have been processed,", print "average cost until now:", "cost =", total_error / used_batch, ",", print "cost_c =", error_c / used_batch, ",", print "time:", time.time() - partial_start partial_num_files = 0 if not options["is_debugging"]: print "save model... ", save_model(cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print "finished" num_partial += 1 print "in this epoch, total average cost =", total_error / used_batch, ",", print "cost_c =", error_c / used_batch, ",", print "time:", time.time() - epoch_start print_sent_dec(y_pred, batch.y_ext, batch.y_mask, batch.x_ext_words, modules, consts, options, local_batch_size) if last_total_error > total_error or options["is_debugging"]: last_total_error = total_error if not options["is_debugging"]: print "save model... ", save_model(cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print "finished" else: print "optimization finished" break print "save final model... ", save_model(cfg.cc.MODEL_PATH + model_name + ".final.gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print "finished" else: print "skip training model" if predict_model: model.eval() predict(model, modules, consts, options) print "Finished, time:", time.time() - running_start
def predict(model, modules, consts, options): print "start predicting," options["has_y"] = TESTING_DATASET_CLS.HAS_Y if options["beam_decoding"]: print "using beam search" else: print "using greedy search" rebuild_dir(cfg.cc.BEAM_SUMM_PATH) rebuild_dir(cfg.cc.BEAM_GT_PATH) rebuild_dir(cfg.cc.GROUND_TRUTH_PATH) rebuild_dir(cfg.cc.SUMM_PATH) print "loading test set..." if opt.debug: xy_list = pickle.load( open(cfg.cc.TESTING_DATA_PATH + "test_500.pkl", "r")) else: xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "r")) batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts) # Save order of batches for ngram overlap batches_sorted_idx = [] print "num_files = ", num_files, ", num_batches = ", num_batches running_start = time.time() partial_num = 0 total_num = 0 si = 0 for idx_batch in xrange(num_batches): test_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in test_idx] batch = datar.get_data(batch_raw, modules, consts, options) assert len(test_idx) == batch.x.shape[1] # local_batch_size x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs, batch_sorted_idx = sort_samples(batch.x, batch.len_x, \ batch.x_mask, batch.y, batch.len_y, batch.y_mask, \ batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words, return_idx=True) batches_sorted_idx.append(batch_sorted_idx) word_emb, dec_state = model.encode(torch.LongTensor(x).to(options["device"]),\ torch.LongTensor(len_x).to(options["device"]),\ torch.FloatTensor(x_mask).to(options["device"])) if options["beam_decoding"]: for idx_s in xrange(len(test_idx)): if options["copy"]: inputx = (torch.LongTensor(x_ext[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\ torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], oy[idx_s],\ batch.max_ext_len, oovs[idx_s]) else: inputx = (torch.LongTensor(x[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\ torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], oy[idx_s]) beam_decode(si, inputx, model, modules, consts, options) si += 1 else: if options["copy"]: inputx = (torch.LongTensor(x_ext).to(options["device"]), word_emb, dec_state, \ torch.FloatTensor(x_mask).to(options["device"]), y, len_y, oy, batch.max_ext_len, oovs) else: inputx = (torch.LongTensor(x).to(options["device"]), word_emb, dec_state, torch.FloatTensor(x_mask).to( options["device"]), y, len_y, oy) greedy_decode(test_idx, inputx, model, modules, consts, options) si += len(test_idx) testing_batch_size = len(test_idx) partial_num += testing_batch_size total_num += testing_batch_size if partial_num >= consts["testing_print_size"]: print total_num, "summs are generated" partial_num = 0 pickle.dump(batches_sorted_idx, open(opt.output_dir + '/test_batch_order.pkl', 'wb')) print si, total_num
def run(existing_model_name=None, is_predicting=0): modules, consts, options = init_modules(is_predicting) #print("value:", options["is_predicting"]) if options["is_predicting"]: need_load_model = True training_model = False predict_model = True else: need_load_model = False training_model = True predict_model = False print_basic_info(modules, consts, options) if training_model: print("loading train set...") train_xy_list = pickle.load( open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "rb")) val_xy_list = pickle.load( open(cfg.cc.VALIDATE_DATA_PATH + "valid.pkl", "rb")) train_batch_list, train_size, n_train_batches = datar.batched( len(train_xy_list), options, consts) val_batch_list, val_size, n_val_batches = datar.batched( len(val_xy_list), options, consts) print("train size =", train_size, ", num training batches =", n_train_batches) print("val size =", val_size, ", num validation batches =", n_val_batches) running_start = time.time() if True: #TODO: refactor print("compiling model ...") model = Model(modules, consts, options) if options["cuda"]: model.cuda() optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1) model_name = "".join(["s2s.", options["cell"]]) existing_epoch = 0 if need_load_model: if existing_model_name == None: existing_model_name = "cnndm.s2s.transformer.gpu0.epoch27.2" print("loading existed model:", existing_model_name) model, optimizer = load_model( cfg.cc.MODEL_PATH + existing_model_name, model, optimizer) if training_model: print("start training model ") #print_size = num_files // consts["print_time"] if num_files >= consts["print_time"] else num_files last_total_error = float("inf") best_val_loss = 999999999.0 print("max epoch:", consts["max_epoch"]) for epoch in range(0, consts["max_epoch"]): print("epoch %s:" % (epoch + existing_epoch)) num_partial = 1 train_loss = 0.0 error_c = 0.0 partial_num_files = 0 epoch_start = time.time() partial_start = time.time() # shuffle the trainset train_batch_list, train_size, n_train_batches = datar.batched( len(train_xy_list), options, consts) n_used_train_batch = 0 model.train() for idx_batch in range(n_train_batches): train_idx = train_batch_list[idx_batch] train_batch_raw = [ train_xy_list[xy_idx] for xy_idx in train_idx ] if len(train_batch_raw) != consts["batch_size"]: continue local_batch_size = len(train_batch_raw) train_batch = datar.get_data(train_batch_raw, modules, consts, options) model.zero_grad() y_pred, cost = model(torch.LongTensor(train_batch.x).cuda(options["device"]),\ torch.LongTensor(train_batch.y_inp).cuda(options["device"]),\ torch.LongTensor(train_batch.y).cuda(options["device"]),\ torch.FloatTensor(train_batch.x_mask).cuda(options["device"]),\ torch.FloatTensor(train_batch.y_mask).cuda(options["device"]),\ torch.LongTensor(train_batch.x_ext).cuda(options["device"]),\ torch.LongTensor(train_batch.y_ext).cuda(options["device"]), \ train_batch.max_ext_len) cost.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), consts["norm_clip"]) optimizer.step() cost = cost.item() train_loss += cost n_used_train_batch += 1 partial_num_files += consts["batch_size"] if n_used_train_batch % 3000 == 0: print("\tprocessed %s batches..." % n_used_train_batch) #break """ if partial_num_files // print_size == 1 and idx_batch < num_batches: print (idx_batch + 1, "/" , num_batches, "batches have been processed,", \ "average cost until now:", "cost =", total_error / used_batch, ",", \ "cost_c =", error_c / used_batch, ",", \ "time:", time.time() - partial_start) partial_num_files = 0 if not options["is_debugging"]: print("save model... ",) file_name = model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial) save_model(cfg.cc.MODEL_PATH + file_name, model, optimizer) if options["fire"]: shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/") print("finished") num_partial += 1 """ elapsed_time = time.time() - epoch_start model.eval() n_used_val_batch = 0 val_loss = 0.0 with torch.no_grad(): for idx_batch in range(n_val_batches): val_idx = val_batch_list[idx_batch] val_batch_raw = [ val_xy_list[xy_idx] for xy_idx in val_idx ] if len(val_batch_raw) != consts["batch_size"]: continue local_batch_size = len(val_batch_raw) val_batch = datar.get_data(val_batch_raw, modules, consts, options) y_pred, cost = model(torch.LongTensor(val_batch.x).cuda(options["device"]), \ torch.LongTensor(val_batch.y_inp).cuda(options["device"]), \ torch.LongTensor(val_batch.y).cuda(options["device"]), \ torch.FloatTensor(val_batch.x_mask).cuda(options["device"]), \ torch.FloatTensor(val_batch.y_mask).cuda(options["device"]), \ torch.LongTensor(val_batch.x_ext).cuda(options["device"]), \ torch.LongTensor(val_batch.y_ext).cuda(options["device"]), \ val_batch.max_ext_len) n_used_val_batch += 1 val_loss += cost.item() val_loss /= float(n_used_val_batch) print("in this epoch, training loss =", train_loss / n_used_train_batch, ", validation loss =", val_loss, ", time:", elapsed_time) if val_loss < best_val_loss: best_val_loss = val_loss filename = cfg.cc.MODEL_PATH + model_name + "_checkpoint_epoch%s.pkl" % ( epoch + 1) print("Exceed! save the model to %s..." % filename) save_model(filename, model, optimizer) print("finished") else: print("skip training model") if predict_model: predict(model, modules, consts, options) print("Finished, time:", time.time() - running_start)
def predict(model, modules, consts, options): print("start predicting,") model.eval() options["has_y"] = TESTING_DATASET_CLS.HAS_Y if options["beam_decoding"]: print("using beam search") else: print("using greedy search") rebuild_dir(cfg.cc.BEAM_SUMM_PATH) rebuild_dir(cfg.cc.BEAM_GT_PATH) rebuild_dir(cfg.cc.GROUND_TRUTH_PATH) rebuild_dir(cfg.cc.SUMM_PATH) print("loading test set...") xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "rb")) batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts) print("num_files = ", num_files, ", num_batches = ", num_batches) of = cfg.cc.RESULT_PATH + "out.txt" if os.path.exists(of): os.remove(of) pred_resp_file = cfg.cc.RESULT_PATH + 'pred_resp.txt' if os.path.exists(pred_resp_file): os.remove(pred_resp_file) gold_resp_file = cfg.cc.RESULT_PATH + 'gold_resp.txt' if os.path.exists(gold_resp_file): os.remove(gold_resp_file) running_start = time.time() partial_num = 0 total_num = 0 si = 0 for idx_batch in range(num_batches): test_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in test_idx] batch = datar.get_data(batch_raw, modules, consts, options) assert len(test_idx) == batch.x.shape[1] # local_batch_size word_emb, padding_mask = model.encode( torch.LongTensor(batch.x).cuda(options["device"])) if options["beam_decoding"]: for idx_s in range(len(test_idx)): if options["copy"]: inputx = (torch.LongTensor(batch.x_ext[:, idx_s]).cuda(options["device"]), \ torch.FloatTensor(batch.x_mask[:, idx_s, :]).cuda(options["device"]), \ word_emb[:, idx_s, :], padding_mask[:, idx_s],\ batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s],\ batch.max_ext_len, batch.x_ext_words[idx_s], batch.original_contents[idx_s]) else: inputx = (torch.LongTensor(batch.x[:, idx_s]).cuda(options["device"]), word_emb[:, idx_s, :], padding_mask[:, idx_s],\ batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s], batch.original_contents[idx_s]) beam_decode(si, inputx, model, modules, consts, options) si += 1 else: pass #greedy_decode() testing_batch_size = len(test_idx) partial_num += testing_batch_size total_num += testing_batch_size if partial_num >= consts["testing_print_size"]: print(total_num, "summs are generated") partial_num = 0 pred_dist_file = cfg.cc.RESULT_PATH + 'pred_resp_dist.txt' print("Calculating distinct metrics...") os.system('python distinct_topk.py 1927 < %s > %s' % (pred_resp_file, pred_dist_file)) os.system('perl multi-bleu.perl %s < %s' % (gold_resp_file, pred_resp_file)) print(si, total_num)
def run(existing_model_name=None): modules, consts, options = init_modules() #use_gpu(consts["idx_gpu"]) if options["is_predicting"]: need_load_model = True training_model = False predict_model = True else: need_load_model = False training_model = True predict_model = False print_basic_info(modules, consts, options) if training_model: print("loading train set...") if options["is_debugging"]: xy_list = pickle.load( open(cfg.cc.VALIDATE_DATA_PATH + "valid.pkl", "rb")) else: xy_list = pickle.load( open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "rb")) batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) print("num_files = ", num_files, ", num_batches = ", num_batches) running_start = time.time() if True: #TODO: refactor print("compiling model ...") model = Model(modules, consts, options) if options["cuda"]: model.cuda() optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1) model_name = "".join(["s2s.", options["cell"]]) existing_epoch = 0 if need_load_model: if existing_model_name == None: existing_model_name = "db.s2s.gru.gpu3.epoch15.5" print("loading existed model:", existing_model_name) model, optimizer = load_model( cfg.cc.MODEL_PATH + existing_model_name, model, optimizer) if training_model: print("start training model ") print_size = num_files // consts[ "print_time"] if num_files >= consts[ "print_time"] else num_files last_total_error = float("inf") print("max epoch:", consts["max_epoch"]) for epoch in range(0, consts["max_epoch"]): print("epoch: ", epoch + existing_epoch) num_partial = 1 total_error = 0.0 error_c = 0.0 partial_num_files = 0 epoch_start = time.time() partial_start = time.time() # shuffle the trainset batch_list, num_files, num_batches = datar.batched( len(xy_list), options, consts) used_batch = 0. for idx_batch in range(num_batches): train_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in train_idx] if len(batch_raw) != consts["batch_size"]: continue local_batch_size = len(batch_raw) batch = datar.get_data(batch_raw, modules, consts, options) x, len_x, x_mask, y, len_y, y_mask, ox, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \ batch.x_mask, batch.y, batch.len_y, batch.y_mask, \ batch.original_contents, batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words) model.zero_grad() y_pred, cost, cost_c = model(torch.LongTensor(x).to(options["device"]), torch.LongTensor(len_x).to(options["device"]),\ torch.LongTensor(y).to(options["device"]), torch.FloatTensor(x_mask).to(options["device"]), \ torch.FloatTensor(y_mask).to(options["device"]), torch.LongTensor(x_ext).to(options["device"]),\ torch.LongTensor(y_ext).to(options["device"]), \ batch.max_ext_len) if cost_c is None: loss = cost else: loss = cost + cost_c cost_c = cost_c.item() error_c += cost_c loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), consts["norm_clip"]) optimizer.step() cost = cost.item() total_error += cost used_batch += 1 partial_num_files += consts["batch_size"] if (partial_num_files // print_size) == 1 and idx_batch < num_batches: print(idx_batch + 1, "/" , num_batches, "batches have been processed,", \ "average cost until now:", "cost =", total_error / used_batch, ",", \ "cost_c =", error_c / used_batch, ",",\ "time:", time.time() - partial_start) partial_num_files = 0 if not options["is_debugging"]: print("save model... ", ) save_model( cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print("finished") num_partial += 1 print("in this epoch, total average cost =", total_error / used_batch, ",", \ "cost_c =", error_c / used_batch, ",",\ "time:", time.time() - epoch_start) print_sent_dec(y_pred, y_ext, y_mask, oovs, modules, consts, options, local_batch_size) if last_total_error > total_error or options["is_debugging"]: last_total_error = total_error if not options["is_debugging"]: print("save model... ", ) save_model( cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print("finished") else: print("optimization finished") break print("save final model... ", ) save_model( cfg.cc.MODEL_PATH + model_name + ".final.gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer) print("finished") else: print("skip training model") if predict_model: predict(model, modules, consts, options) print("Finished, time:", time.time() - running_start)
def predict(model, modules, consts, options): print("start predicting,") options["has_y"] = TESTING_DATASET_CLS.HAS_Y if options["beam_decoding"]: print("using beam search") else: print("using greedy search") rebuild_dir(cfg.cc.BEAM_SUMM_PATH) rebuild_dir(cfg.cc.BEAM_GT_PATH) rebuild_dir(cfg.cc.GROUND_TRUTH_PATH) rebuild_dir(cfg.cc.SUMM_PATH) of = cfg.cc.RESULT_PATH + "out.txt" if os.path.exists(of): os.remove(of) print("loading test set...") if options["model_selection"]: xy_list = pickle.load( open(cfg.cc.VALIDATE_DATA_PATH + "pj3000.pkl", "rb")) else: xy_list = pickle.load( open(cfg.cc.TESTING_DATA_PATH + "pj3000.pkl", "rb")) batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts) print("num_files = ", num_files, ", num_batches = ", num_batches) running_start = time.time() partial_num = 0 total_num = 0 si = 0 for idx_batch in range(num_batches): test_idx = batch_list[idx_batch] batch_raw = [xy_list[xy_idx] for xy_idx in test_idx] batch = datar.get_data(batch_raw, modules, consts, options) assert len(test_idx) == batch.x.shape[1] # local_batch_size x, len_x, x_mask, y, len_y, y_mask, ox, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \ batch.x_mask, batch.y, batch.len_y, batch.y_mask, \ batch.original_contents, batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words) word_emb, dec_state = model.encode(torch.LongTensor(x).to(options["device"]),\ torch.LongTensor(len_x).to(options["device"]),\ torch.FloatTensor(x_mask).to(options["device"])) if options["beam_decoding"]: for idx_s in range(len(test_idx)): if options["copy"]: inputx = (torch.LongTensor(x_ext[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\ torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], ox[idx_s], oy[idx_s],\ batch.max_ext_len, oovs[idx_s]) else: inputx = (torch.LongTensor(x[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\ torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], ox[idx_s], oy[idx_s]) beam_decode(si, inputx, model, modules, consts, options) si += 1 else: if options["copy"]: inputx = (torch.LongTensor(x_ext).to(options["device"]), word_emb, dec_state, \ torch.FloatTensor(x_mask).to(options["device"]), y, len_y, oy, batch.max_ext_len, oovs) else: inputx = (torch.LongTensor(x).to(options["device"]), word_emb, dec_state, torch.FloatTensor(x_mask).to( options["device"]), y, len_y, oy) greedy_decode(test_idx, inputx, model, modules, consts, options) si += len(test_idx) testing_batch_size = len(test_idx) partial_num += testing_batch_size total_num += testing_batch_size if partial_num >= consts["testing_print_size"]: print(total_num, "summs are generated") partial_num = 0 print(si, total_num)
lr = 0.001 drop_rate = 0. batch_size = 128 hidden_size = 500 latent_size = 2 # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum optimizer = "adam" continuous = False if continuous: pass else: train_set, valid_set, test_set = data.mnist() train_xy = data.batched(train_set, batch_size) dim_x = train_xy[0][0].shape[1] dim_y = train_xy[0][1].shape[1] print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = VAE(dim_x, dim_x, hidden_size, latent_size, optimizer) print "training..." start = time.time() for i in xrange(50): error = 0.0 in_start = time.time() for batch_id, xy in train_xy.items(): X = xy[0] cost = model.train(X, lr)