batch_loader = BatchLoader('') parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) rvae = RVAE(parameters) if args.use_trained: rvae.load_state_dict(t.load('trained_RVAE')) if args.use_cuda: rvae = rvae.cuda() optimizer = Adam(rvae.learnable_parameters(), args.learning_rate) train_step = rvae.trainer(optimizer, batch_loader) validate = rvae.validater(batch_loader) ce_result = [] kld_result = [] for iteration in range(args.num_iterations): cross_entropy, kld, coef = train_step(iteration, args.batch_size, args.use_cuda, args.dropout) if iteration % 5 == 0: print('\n') print('------------TRAIN-------------') print('----------ITERATION-----------') print(iteration) print('--------CROSS-ENTROPY---------')
batch_loader = BatchLoader('') parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) rvae = RVAE(parameters) if args.use_trained: rvae.load_state_dict(t.load('trained_RVAE')) if args.use_cuda: rvae = rvae.cuda() optimizer = Adam(rvae.learnable_parameters(), args.learning_rate) train_step = rvae.trainer(optimizer) # validate = rvae.validater() ce_result = [] kld_result = [] # training_data = batch_loader.training_data('train') # validation_data = batch_loader.training_data('valid') for iteration in range(args.num_iterations): print(f"-----Iteration: {iteration}-------------") x = 0 bar = progressbar.ProgressBar(maxval=130001, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() while True: input = batch_loader.next_batch(x, args.batch_size, "train")
with open('train.txt', 'r') as f: data = f.readlines() preprocess = Preprocess(embedding_model) input = preprocess.to_sequence(data) # embedding=preprocess.embedding() # np.save('embedding',embedding) batch_loader = Batch(input, 0.7) params=Parameter(word_embed_size=300,encode_rnn_size=600,latent_variable_size=1400,\ decode_rnn_size=600,vocab_size=preprocess.vocab_size,embedding_path='embedding.npy') model = RVAE(params) model = model.cuda() optimizer = Adam(model.learnable_parameters(), 1e-3) train_step = model.trainer(optimizer) use_cuda = t.cuda.is_available() ce_list = [] kld_list = [] coef_list = [] test_batch = batch_loader.test_next_batch(1) for i, batch in enumerate(batch_loader.train_next_batch(1)): # if i%20==0: # sample=next(test_batch) # sentence=model.sample(10,sample,use_cuda) # sentence=[preprocess.index_to_word(i) for i in sentence] # print(' '.join(sentence)) # break ce, kld, coef = train_step(batch, 0.2, use_cuda)
batch_loader_2.max_seq_len, batch_loader_2.words_vocab_size, batch_loader_2.chars_vocab_size) '''================================================================================================= ''' rvae = RVAE(parameters, parameters_2) if args.use_trained: rvae.load_state_dict(t.load('trained_RVAE')) if args.use_cuda: rvae = rvae.cuda() optimizer = Adam(rvae.learnable_parameters(), args.learning_rate) train_step = rvae.trainer( optimizer, batch_loader, batch_loader_2) # batchloader里面是原始句子,batechloader2里面存储的是释义句 validate = rvae.validater(batch_loader, batch_loader_2) ce_result = [] kld_result = [] start_index = 0 # start_index_2 = 0 for iteration in range(args.num_iterations): #This needs to be changed ##这一步必须保证不大于训练数据数量-每一批数据的大小,否则越界报错###################### start_index = (start_index + 1) % (49999 - args.batch_size) #start_index = (start_index+args.batch_size)%149163 #计算交叉熵损失,等 cross_entropy, kld, coef = train_step(iteration, args.batch_size, args.use_cuda, args.dropout,