def _test_full_beam_decoder(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) sample = next(samples) src_tokens = sample['net_input']['src_tokens'][0:1].t() src_lengths = sample['net_input']['src_lengths'][0:1].int() num_models = 3 model_list = [] for _ in range(num_models): model_list.append(models.build_model(test_args, src_dict, tgt_dict)) bs = BeamSearch(model_list, src_tokens, src_lengths, beam_size=6) prev_token = torch.LongTensor([0]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(11) prev_hypos_indices = torch.zeros(6, dtype=torch.int64) outs = bs(src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20])) import io f = io.BytesIO() torch.onnx._export( bs, (src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20])), f, export_params=True, verbose=False, example_outputs=outs) torch.onnx._export_to_pretty_string( bs, (src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20])), f, export_params=True, verbose=False, example_outputs=outs) f.seek(0) import onnx onnx_model = onnx.load(f) c2_model = caffe2_backend.prepare(onnx_model) c2_model.run( (src_tokens.numpy(), src_lengths.numpy(), prev_token.numpy(), prev_scores.numpy(), attn_weights.numpy(), prev_hypos_indices.numpy(), np.array([20])))
def main(): parser = get_parser_with_args() args = parser.parse_args() if args.output_file == "": print("No action taken. Need output_file to be specified.") parser.print_help() return checkpoint_filenames = args.path.split(CHECKPOINT_PATHS_DELIMITER) beam_search = BeamSearch.build_from_checkpoints( checkpoint_filenames=checkpoint_filenames, src_dict_filename=args.src_dict, dst_dict_filename=args.dst_dict, beam_size=args.beam_size, word_reward=args.word_reward, unk_reward=args.unk_reward, ) beam_search.save_to_pytorch(output_path=args.output_file) if args.output_graph_file: with open(args.output_graph_file.path, "w") as f: f.write(str(beam_search.graph))
def test_basic_generate(self): """ A basic test that the output given by SequenceGenerator class is the same """ # Setup parameters required for SequenceGenerator and BeamSeach/BeamDecode TEST_ARGS = test_utils.ModelParamsDict(arch="rnn") TEST_ARGS.sequence_lstm = True BEAM_SIZE = 1 WORD_REWARD = 0 UNK_REWARD = 0 LENGTH_PENALTY = 0 PLACEHOLDER_SEQ_LENGTH = 5 NBEST = 2 MAX_SEQ_LEN = 7 src_tokens = torch.LongTensor([[0, 0, 0]]) src_lengths = torch.LongTensor([3]) # Generate values using SequenceGenerator _, src_dict, tgt_dict = test_utils.prepare_inputs(TEST_ARGS) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) model = task.build_model(TEST_ARGS) translator = SequenceGenerator( [model], task.target_dictionary, beam_size=BEAM_SIZE, word_reward=WORD_REWARD, unk_reward=UNK_REWARD, ) encoder_input = {"src_tokens": src_tokens, "src_lengths": src_lengths} top_seq_gen_hypothesis = translator.generate(encoder_input, maxlen=MAX_SEQ_LEN)[0] # Generate output using BeamSearch/BeamDecode placeholder_src_tokens = torch.LongTensor( np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64")) placeholder_src_lengths = torch.IntTensor( np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32")) beam_search = BeamSearch( [model], tgt_dict, placeholder_src_tokens, placeholder_src_lengths, beam_size=BEAM_SIZE, word_reward=WORD_REWARD, unk_reward=UNK_REWARD, quantize=False, ) beam_decode = BeamDecode( eos_token_id=tgt_dict.eos(), length_penalty=LENGTH_PENALTY, nbest=NBEST, beam_size=BEAM_SIZE, stop_at_eos=True, ) # Few more placeholder inputs for BeamSearch prev_token = torch.LongTensor([tgt_dict.eos()]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(src_lengths[0].item()) prev_hypos_indices = torch.zeros(1, dtype=torch.int64) num_steps = torch.LongTensor([MAX_SEQ_LEN]) all_tokens, all_scores, all_weights, all_prev_indices = beam_search( src_tokens.transpose(0, 1), src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, num_steps, ) beam_decode_output = beam_decode(all_tokens, all_scores, all_weights, all_prev_indices, num_steps[0]) for hyp_index in range( min(len(beam_decode_output), len(top_seq_gen_hypothesis))): top_beam_decode_hypothesis = beam_decode_output[hyp_index] # Compare two outputs # We always look only from 0 to MAX_SEQ_LEN, because sequence generator # adds an EOS at the end after MAX_SEQ_LEN ## Compare two hypothesis np.testing.assert_array_equal( top_seq_gen_hypothesis[hyp_index]["tokens"].tolist() [0:MAX_SEQ_LEN], top_beam_decode_hypothesis[0].tolist()[0:MAX_SEQ_LEN], ) ## Compare token level scores np.testing.assert_array_almost_equal( top_seq_gen_hypothesis[hyp_index] ["positional_scores"].tolist()[0:MAX_SEQ_LEN], top_beam_decode_hypothesis[2][0:MAX_SEQ_LEN], decimal=1, ) ## Compare attention weights np.testing.assert_array_almost_equal( top_seq_gen_hypothesis[hyp_index]["attention"].numpy() [:, 0:MAX_SEQ_LEN], torch.stack(top_beam_decode_hypothesis[3]).transpose( 0, 1).numpy()[:, 0:MAX_SEQ_LEN], decimal=1, )
def main(): parser = argparse.ArgumentParser( description=( 'Export PyTorch-trained FBTranslate models to caffe2' ), ) parser.add_argument( '--checkpoint', action='append', nargs='+', help='PyTorch checkpoint file (at least one required)', ) parser.add_argument( '--output_file', default='', help='File name to which to save beam search network', ) parser.add_argument( '--src_dict', required=True, help='File encoding PyTorch dictionary for source language', ) parser.add_argument( '--dst_dict', required=True, help='File encoding PyTorch dictionary for source language', ) parser.add_argument( '--beam_size', type=int, default=6, help='Number of top candidates returned by each decoder step', ) parser.add_argument( '--word_penalty', type=float, default=0.0, help='Value to add for each word (besides EOS)', ) parser.add_argument( '--unk_penalty', type=float, default=0.0, help='Value to add for each word UNK token', ) args = parser.parse_args() if args.output_file == '': print('No action taken. Need output_file to be specified.') parser.print_help() return checkpoint_filenames = [arg[0] for arg in args.checkpoint] beam_search = BeamSearch.build_from_checkpoints( checkpoint_filenames=checkpoint_filenames, src_dict_filename=args.src_dict, dst_dict_filename=args.dst_dict, beam_size=args.beam_size, word_penalty=args.word_penalty, unk_penalty=args.unk_penalty, ) beam_search.save_to_db( args.output_file, )
def _test_beam_component_equivalence(self, test_args): beam_size = 5 samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) # to initialize BeamSearch object sample = next(samples) # [seq len, batch size=1] src_tokens = sample["net_input"]["src_tokens"][0:1].t() # [seq len] src_lengths = sample["net_input"]["src_lengths"][0:1].long() beam_size = 5 full_beam_search = BeamSearch(model_list, tgt_dict, src_tokens, src_lengths, beam_size=beam_size) encoder_ensemble = EncoderEnsemble(model_list) # to initialize decoder_step_ensemble with torch.no_grad(): pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list, tgt_dict, beam_size=beam_size) prev_token = torch.LongTensor([tgt_dict.eos()]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(src_tokens.shape[0]) prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64) num_steps = torch.LongTensor([2]) with torch.no_grad(): ( bs_out_tokens, bs_out_scores, bs_out_weights, bs_out_prev_indices, ) = full_beam_search( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, num_steps, ) comp_out_tokens = (np.ones([num_steps + 1, beam_size], dtype="int64") * tgt_dict.eos()) comp_out_scores = np.zeros([num_steps + 1, beam_size]) comp_out_weights = np.zeros( [num_steps + 1, beam_size, src_lengths.numpy()[0]]) comp_out_prev_indices = np.zeros([num_steps + 1, beam_size], dtype="int64") # single EOS in flat array input_tokens = torch.LongTensor(np.array([tgt_dict.eos()])) prev_scores = torch.FloatTensor(np.array([0.0])) timestep = torch.LongTensor(np.array([0])) with torch.no_grad(): pytorch_first_step_outputs = decoder_step_ensemble( input_tokens, prev_scores, timestep, *pytorch_encoder_outputs) comp_out_tokens[1, :] = pytorch_first_step_outputs[0] comp_out_scores[1, :] = pytorch_first_step_outputs[1] comp_out_prev_indices[1, :] = pytorch_first_step_outputs[2] comp_out_weights[1, :, :] = pytorch_first_step_outputs[3] next_input_tokens = pytorch_first_step_outputs[0] next_prev_scores = pytorch_first_step_outputs[1] timestep += 1 # Tile states after first timestep next_states = list(pytorch_first_step_outputs[4:]) for i in range(len(model_list)): next_states[i] = next_states[i].repeat(1, beam_size, 1) with torch.no_grad(): pytorch_next_step_outputs = decoder_step_ensemble( next_input_tokens, next_prev_scores, timestep, *next_states) comp_out_tokens[2, :] = pytorch_next_step_outputs[0] comp_out_scores[2, :] = pytorch_next_step_outputs[1] comp_out_prev_indices[2, :] = pytorch_next_step_outputs[2] comp_out_weights[2, :, :] = pytorch_next_step_outputs[3] np.testing.assert_array_equal(comp_out_tokens, bs_out_tokens.numpy()) np.testing.assert_allclose(comp_out_scores, bs_out_scores.numpy(), rtol=1e-4, atol=1e-6) np.testing.assert_array_equal(comp_out_prev_indices, bs_out_prev_indices.numpy()) np.testing.assert_allclose(comp_out_weights, bs_out_weights.numpy(), rtol=1e-4, atol=1e-6)
def _test_full_beam_decoder(self, test_args, quantize=False): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) sample = next(samples) # [seq len, batch size=1] src_tokens = sample["net_input"]["src_tokens"][0:1].t() # [seq len] src_lengths = sample["net_input"]["src_lengths"][0:1].long() num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) length, word_length = 11, 7 if test_args.arch in constants.ARCHS_FOR_CHAR_SOURCE: char_inds = torch.LongTensor( np.random.randint(0, 126, (1, length, word_length), dtype="int64")) word_lengths = torch.IntTensor( np.array([word_length] * length, dtype="int32")).reshape( (1, length)) else: char_inds, word_lengths = None, None beam_size = 6 bs = BeamSearch( model_list, tgt_dict, src_tokens, src_lengths, beam_size=beam_size, quantize=quantize, char_inds=char_inds, word_lengths=word_lengths, ) f = io.BytesIO() bs.save_to_pytorch(f) # Test generalization with a different sequence length src_tokens = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]).unsqueeze(1) src_lengths = torch.LongTensor([11]) prev_token = torch.LongTensor([0]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(src_tokens.shape[0]) prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64) outs = bs( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), char_inds=char_inds, word_lengths=word_lengths, ) f.seek(0) deserialized_bs = torch.jit.load(f) deserialized_bs.apply(lambda s: s._unpack() if hasattr(s, "_unpack") else None) outs_deserialized = deserialized_bs( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), char_inds=char_inds, word_lengths=word_lengths, ) for a, b in zip(outs_deserialized, outs): np.testing.assert_allclose(a.detach().numpy(), b.detach().numpy())
def main(): parser = argparse.ArgumentParser( description=("Export PyTorch-trained FBTranslate models to caffe2")) parser.add_argument( "--checkpoint", action="append", nargs="+", help="PyTorch checkpoint file (at least one required)", ) parser.add_argument( "--output_file", default="", help="File name to which to save beam search network", ) parser.add_argument( "--src_dict", required=True, help="File encoding PyTorch dictionary for source language", ) parser.add_argument( "--dst_dict", required=True, help="File encoding PyTorch dictionary for source language", ) parser.add_argument( "--beam_size", type=int, default=6, help="Number of top candidates returned by each decoder step", ) parser.add_argument( "--word_reward", type=float, default=0.0, help="Value to add for each word (besides EOS)", ) parser.add_argument( "--unk_reward", type=float, default=0.0, help="Value to add for each word UNK token", ) args = parser.parse_args() if args.output_file == "": print("No action taken. Need output_file to be specified.") parser.print_help() return checkpoint_filenames = [arg[0] for arg in args.checkpoint] beam_search = BeamSearch.build_from_checkpoints( checkpoint_filenames=checkpoint_filenames, src_dict_filename=args.src_dict, dst_dict_filename=args.dst_dict, beam_size=args.beam_size, word_reward=args.word_reward, unk_reward=args.unk_reward, ) beam_search.save_to_db(args.output_file)
def main(): parser = argparse.ArgumentParser( description=("Export PyTorch-trained FBTranslate models")) parser.add_argument( "--path", "--checkpoint", metavar="FILE", help="path(s) to model file(s), colon separated", ) parser.add_argument( "--output-file", default="", help="File name to which to save beam search network", ) parser.add_argument( "--output-graph-file", default="", help="File name to which to save the beam search graph for debugging", ) parser.add_argument( "--src-dict", required=True, help="File encoding PyTorch dictionary for source language", ) parser.add_argument( "--dst-dict", required=True, help="File encoding PyTorch dictionary for source language", ) parser.add_argument( "--beam-size", type=int, default=6, help="Number of top candidates returned by each decoder step", ) parser.add_argument( "--word-reward", type=float, default=0.0, help="Value to add for each word (besides EOS)", ) parser.add_argument( "--unk-reward", type=float, default=0.0, help="Value to add for each word UNK token", ) args = parser.parse_args() if args.output_file == "": print("No action taken. Need output_file to be specified.") parser.print_help() return checkpoint_filenames = args.path.split(":") beam_search = BeamSearch.build_from_checkpoints( checkpoint_filenames=checkpoint_filenames, src_dict_filename=args.src_dict, dst_dict_filename=args.dst_dict, beam_size=args.beam_size, word_reward=args.word_reward, unk_reward=args.unk_reward, ) beam_search.save_to_pytorch(output_path=args.output_file) if args.output_graph_file: with open(args.output_graph_file.path, "w") as f: f.write(str(beam_search.graph))