def __init__(self, opt, train_loader, val_loader, model, executor): self.opt = opt self.reinforce = opt.reinforce self.reward_decay = opt.reward_decay self.entropy_factor = opt.entropy_factor self.num_iters = opt.num_iters self.run_dir = opt.run_dir self.display_every = opt.display_every self.checkpoint_every = opt.checkpoint_every self.visualize_training = opt.visualize_training self.visualize_training_wandb = opt.visualize_training_wandb if opt.dataset == 'clevr': self.vocab = utils.load_vocab(opt.clevr_vocab_path) elif opt.dataset == 'clevr-humans': self.vocab = utils.load_vocab(opt.human_vocab_path) else: raise ValueError('Invalid dataset') self.train_loader = train_loader self.val_loader = val_loader self.model = model self.executor = executor # Create Optimizer # # _params_bline = list(filter(lambda p: p.requires_grad, model.seq2seq_baseline.parameters())) _params = list( filter(lambda p: p.requires_grad, model.seq2seq.parameters())) _params_gnn = list( filter(lambda p: p.requires_grad, model.seq2seq.gnn.parameters())) _params_enc = list( filter(lambda p: p.requires_grad, model.seq2seq.encoder.parameters())) # self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.seq2seq.parameters()), # lr=opt.learning_rate) self.optimizer = torch.optim.Adam(_params, lr=opt.learning_rate) self.stats = { 'train_losses': [], 'train_batch_accs': [], 'train_accs_ts': [], 'val_losses': [], 'val_accs': [], 'val_accs_ts': [], 'best_val_acc': -1, 'model_t': 0 } if opt.visualize_training: # Tensorboard # # from reason.utils.logger import Logger self.logger = Logger('%s/logs' % opt.run_dir) if opt.visualize_training_wandb: # WandB: Log metrics with wandb # wandb_proj_name = opt.wandb_proj_name wandb_identifier = opt.run_identifier wandb_name = f"{wandb_identifier}" wandb.init(project=wandb_proj_name, name=wandb_name, notes="Running from mgn.reason.trainer.py") wandb.config.update(opt) wandb.watch(self.model.seq2seq)
def finetune(args): # Construct Solver # data token2idx_src, idx2token_src = load_vocab(args.vocab_src) token2idx_tgt, idx2token_tgt = load_vocab(args.vocab_tgt) args.n_src = len(idx2token_src) args.n_tgt = len(idx2token_tgt) tr_dataset = VQ_Pred_Dataset(args.train_src, args.train_tgt, token2idx_src, token2idx_tgt, args.batch_size, args.maxlen_in, args.maxlen_out, down_sample_rate=args.down_sample_rate) cv_dataset = VQ_Pred_Dataset(args.valid_src, args.valid_tgt, token2idx_src, token2idx_tgt, args.batch_size, args.maxlen_in, args.maxlen_out, down_sample_rate=args.down_sample_rate) tr_loader = DataLoader(tr_dataset, batch_size=1, collate_fn=f_xy_pad, num_workers=args.num_workers, shuffle=args.shuffle) cv_loader = DataLoader(cv_dataset, batch_size=1, collate_fn=f_xy_pad, num_workers=args.num_workers) # load dictionary and generate char_list, sos_id, eos_id data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} if args.structure == 'BERT': from mask_lm.Mask_LM import Mask_LM as Model from mask_lm.solver import Mask_LM_Solver as Solver model = Model.create_model(args) print(model) model.cuda() # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
def get_vocab(opt): if opt.dataset == 'clevr': vocab_json = opt.clevr_vocab_path else: raise ValueError('Invalid dataset') vocab = utils.load_vocab(vocab_json) return vocab
class DataPoint: if os.path.exists( os.path.join(FLAGS["data_dir"], "vocab.chatbot." + str(PROBLEM_HPARAMS["vocabulary_size"]))): vocab_dict = load_vocab() def __init__(self, string, index, only_string=True): """ Params: :string: String to be stored. :index: Number of the line in the file from which this sentence was read. :only_string: Whether to only store string. """ super().__init__(string, index, only_string) self.words = self.string.split() # Replace out of vocabulary words. for i, word in enumerate(self.words): if word not in DataPoint.vocab_dict: self.words[i] = "<unk>" self.words[i] = DataPoint.vocab_dict[self.words[i]] # Transform to counter. self.words = Counter(self.words) # Distance metric between this and another sentence. def distance(self, other_counter, dist_matrix): """ Params: :other_counter: The other sentence to which we calculate distance. :dist_matrix: Distance matrix for all words in vocab. """ def word_sum(self_counter, other_counter): # Compute distance in one way. dist_sum = 0 for self_word in self_counter: minimum = 1 for other_word in other_counter: dist = dist_matrix[self_word, other_word] if dist < minimum: minimum = dist count = self_counter[self_word] dist_sum += count * minimum # Normalize. self_length = len(self_counter) if self_length != 0: dist_sum = dist_sum / self_length return dist_sum # Calculate the sums for the two sentences. first_sum = word_sum(self.words, other_counter.words) second_sum = word_sum(other_counter.words, self.words) return (first_sum + second_sum) / 2 # Computes a similarity metric between two sentences. def similarity(self, other, dist_matrix): return -self.distance(other, dist_matrix)
def main(args): # Construct Solver # data token2idx, idx2token = load_vocab(args.vocab) vocab_size = len(token2idx) tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, token2idx=token2idx, num_workers=args.num_workers, shuffle=args.shuffle, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, token2idx=token2idx, num_workers=args.num_workers, LFR_m=args.LFR_m, LFR_n=args.LFR_n) # load dictionary and generate char_list, sos_id, eos_id data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} # model encoder = Encoder(args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder(vocab_size, args.d_model) model = CTC_Model(encoder, decoder) print(model) model.cuda() # optimizer optimizier = CTCModelOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.warmup_steps) # solver solver = CTC_Solver(data, model, optimizier, args) solver.train()
def __init__(self, question_h5_path, max_samples, vocab_json): self.max_samples = max_samples question_h5 = h5py.File(question_h5_path, 'r') self.questions = torch.LongTensor( np.asarray(question_h5['questions'], dtype=np.int64)) self.image_idxs = np.asarray(question_h5['image_idxs'], dtype=np.int64) self.programs, self.answers = None, None if 'programs' in question_h5: self.programs = torch.LongTensor( np.asarray(question_h5['programs'], dtype=np.int64)) if 'answers' in question_h5: self.answers = np.asarray(question_h5['answers'], dtype=np.int64) self.vocab = utils.load_vocab(vocab_json)
def __init__(self, opt, train_loader, val_loader, model, executor): self.opt = opt self.reinforce = opt.reinforce self.reward_decay = opt.reward_decay self.entropy_factor = opt.entropy_factor self.num_iters = opt.num_iters self.run_dir = opt.run_dir self.display_every = opt.display_every self.checkpoint_every = opt.checkpoint_every self.visualize_training = opt.visualize_training if opt.dataset == 'clevr': self.vocab = utils.load_vocab(opt.clevr_vocab_path) elif opt.dataset == 'clevr-humans': self.vocab = utils.load_vocab(opt.human_vocab_path) else: raise ValueError('Invalid dataset') self.train_loader = train_loader self.val_loader = val_loader self.model = model self.executor = executor self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.seq2seq.parameters()), lr=opt.learning_rate) self.stats = { 'train_losses': [], 'train_batch_accs': [], 'train_accs_ts': [], 'val_losses': [], 'val_accs': [], 'val_accs_ts': [], 'best_val_acc': -1, 'model_t': 0 } if opt.visualize_training: from reason.utils.logger import Logger self.logger = Logger('%s/logs' % opt.run_dir)
def __init__(self, train_scene_json, val_scene_json, vocab_json): self.scenes = { 'train': utils.load_scenes(train_scene_json), 'val': utils.load_scenes(val_scene_json) } self.vocab = utils.load_vocab(vocab_json) self.colors = CLEVR_COLORS self.materials = CLEVR_MATERIALS self.shapes = CLEVR_SHAPES self.sizes = CLEVR_SIZES self.answer_candidates = CLEVR_ANSWER_CANDIDATES self.modules = {} self._register_modules()
def __init__(self, train_scene_json, val_scene_json, vocab_json, *args, **kwargs): self.scenes = { 'train': utils.load_scenes(train_scene_json), 'val': utils.load_scenes(val_scene_json) } self.vocab = utils.load_vocab(vocab_json) self.colors = CLEVR_COLORS self.materials = CLEVR_MATERIALS self.shapes = CLEVR_SHAPES self.sizes = CLEVR_SIZES self.answer_candidates = CLEVR_ANSWER_CANDIDATES self.modules = {} self._register_modules() self.graph_parser = kwargs.get('graph_parser') self.embedder = kwargs.get('embedder')
def __init__(self, opt, split, *args, **kwargs): self.max_samples = opt.max_train_samples if split == 'train' \ else opt.max_val_samples self.question_h5_path = opt.clevr_train_question_path if split == 'train' \ else opt.clevr_val_question_path vocab_json = opt.clevr_vocab_path self.vocab = utils.load_vocab(vocab_json) self.is_directed_graph = opt.is_directed_graph #### Init Questions.h5 Data - Invariant same data as in baseline (ques, progs, ans, img_idx) #### questions, programs, answers, image_idxs, orig_idxs, question_families = \ utils.load_data_from_h5(self.question_h5_path) self.questions = questions self.programs = programs self.answers = answers self.image_idxs = image_idxs self.orig_idxs = orig_idxs self.question_families = question_families #### Init Graph Data: START #### self.graph_data = None # Uncomment the below line to activate preprocessed embedding flow data_list = self._init_graph_data() # Load graph_data from preprocessed embeddings if data_list: logger.info(f"Found preprocessed graph data: self.__init_graph_data(..)") data_s_list, data_t_list = data_list self.graph_data = list(zip_longest(data_s_list, data_t_list)) else: # Dynamically load graph_data embeddings (skips the preprocessing requirement) # N.b Just remove the corresponding *_pairdata.pt file logger.debug(f"Preprocessed graph data *_pairdata.pt not found, dynammicall generate g_data") logger.info(f"Dynamic Graph Data Gen Flow") # raise ValueError if any of the following are None, required for Dynamic Flow self.graph_parser = kwargs.get('graph_parser') self.embedder = kwargs.get('embedder') self.raw_question_path = opt.clevr_train_raw_question_path if split=='train' \ else opt.clevr_val_raw_question_path self.parsed_img_scene_path = opt.clevr_train_parsed_scene_path if split=='train' \ else opt.clevr_val_parsed_scene_path logger.debug(f"split: {split}, raw_question_path: {self.raw_question_path}, " f" parsed_img_scene_path: {self.parsed_img_scene_path}") try: self.raw_questions = get_question_file(self.raw_question_path) self.img_scenes = get_img_scenes(self.parsed_img_scene_path) except FileNotFoundError as fne: logger.error(f"Raw questions.json or parsed image scenes not found: {fne}")
def recognize(args): model, LFR_m, LFR_n = CTC_Model.load_model(args.model_path) print(model) model.eval() model.cuda() token2idx, idx2token = load_vocab(args.dict) blank_index = token2idx['<blk>'] if args.beam_size == 1: from ctcModel.ctc_infer import GreedyDecoder decode = GreedyDecoder(space_idx=0, blank_index=blank_index) else: from ctcModel.ctc_infer import BeamDecoder decode = BeamDecoder(beam_width=args.beam_size, blank_index=blank_index, space_idx=0) # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] # decode each utterance with torch.no_grad(), open(args.output, 'w') as f: for idx, name in enumerate(js.keys(), 1): print('(%d/%d) decoding %s' % (idx, len(js.keys()), name), flush=True) input = kaldi_io.read_mat(js[name]['input'][0]['feat']) # TxD input = build_LFR_features(input, LFR_m, LFR_n) input = torch.from_numpy(input).float() input_length = torch.tensor([input.size(0)], dtype=torch.int) input = input.cuda() input_length = input_length.cuda() hyps_ints = model.recognize(input, input_length, decode, args) hyp = ids2str(hyps_ints, idx2token)[0] f.write(name + ' ' + hyp + '\n')
def main(args): # Construct Solver # data token2idx, idx2token = load_vocab(args.vocab) args.vocab_size = len(token2idx) args.sos_id = token2idx['<sos>'] args.eos_id = token2idx['<eos>'] tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, token2idx=token2idx, label_type=args.label_type, num_workers=args.num_workers, shuffle=args.shuffle, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, token2idx=token2idx, label_type=args.label_type, num_workers=args.num_workers, LFR_m=args.LFR_m, LFR_n=args.LFR_n) # load dictionary and generate char_list, sos_id, eos_id data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} if args.structure == 'transformer': from transformer.Transformer import Transformer from transformer.solver import Transformer_Solver as Solver model = Transformer.create_model(args) elif args.structure == 'transformer-ctc': from transformer.Transformer import CTC_Transformer as Transformer from transformer.solver import Transformer_CTC_Solver as Solver model = Transformer.create_model(args) elif args.structure == 'conv-transformer-ctc': from transformer.Transformer import Conv_CTC_Transformer as Transformer from transformer.solver import Transformer_CTC_Solver as Solver model = Transformer.create_model(args) elif args.structure == 'cif': from transformer.CIF_Model import CIF_Model from transformer.solver import CIF_Solver as Solver model = CIF_Model.create_model(args) print(model) model.cuda() # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
type=str, default='./', help='Path to save pretrain model.') return parser.parse_args() if __name__ == '__main__': args = get_args() if args.cuda: os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_num device = torch.device('cuda:0') else: device = torch.device('cpu') word2index, _ = load_vocab(args.vocab_path) show_info(epoch=args.epoch, vocab_size=len(word2index), USE_CUDA=args.cuda) train_source = lang(filelist=args.train_target, word2index=word2index, PAD=Constants.PAD_WORD, EOS=Constants.EOS_WORD, max_len=args.clip_length) train_target_inputs = lang(filelist=args.train_source, word2index=word2index, PAD=Constants.PAD_WORD, BOS=Constants.BOS_WORD, max_len=args.clip_length)
if __name__ == '__main__': args = get_args() if args.cuda: os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(args.cuda_num) if len(args.cuda_num) == 1: device_para = torch.device('cuda:0') device_back = torch.device('cuda:0') else: device_para = torch.device('cuda:0') device_back = torch.device('cuda:0') else: device = torch.device('cpu') word2index, index2word = load_vocab(args.vocab_path) show_info(epoch=args.epoch, vocab_size=len(word2index), USE_CUDA=args.cuda) train_source = lang(filelist=args.train_source, word2index=word2index, PAD=Constants.PAD_WORD, max_len=args.clip_length) train_target = lang(filelist=args.train_target, word2index=word2index, PAD=Constants.PAD_WORD, max_len=args.clip_length) train_source_inputs = lang(filelist=args.train_source, word2index=word2index,
def test(args): if args.structure == 'transformer': from transformer.Transformer import Transformer as Model elif args.structure == 'transformer-ctc': from transformer.Transformer import CTC_Transformer as Model elif args.structure == 'conv-transformer-ctc': from transformer.Transformer import Conv_CTC_Transformer as Model elif args.structure == 'cif': from transformer.CIF_Model import CIF_Model as Model token2idx, idx2token = load_vocab(args.vocab) args.sos_id = token2idx['<sos>'] args.eos_id = token2idx['<eos>'] args.vocab_size = len(token2idx) model = Model.load_model(args.model_path, args) print(model) model.eval() model.cuda() # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] cur_time = time.time() # decode each utterance test_dataset = AudioDataset('/home/easton/projects/OpenASR/egs/aishell1/data/test.json', token2idx, frames_size=1000, len_in_max=1999, len_out_max=99) test_loader = DataLoader(test_dataset, batch_size=1, collate_fn=batch_generator(), num_workers=args.num_workers) # test_loader = AudioDataLoader(test_dataset, batch_size=1, # token2idx=token2idx, # label_type=args.label_type, # num_workers=args.num_workers, # LFR_m=args.LFR_m, LFR_n=args.LFR_n) def process_batch(hyps, scores, idx2token, fw): for nbest, nscore in zip(hyps, scores): for n, (hyp, score) in enumerate(zip(nbest, nscore)): hyp = hyp.tolist() try: eos = hyp.index(3) except: eos = None hyp = ''.join(idx2token[i] for i in hyp[:eos]) print("top{}: {} score: {:.3f}\n".format(n+1, hyp, score)) if n == 0: fw.write("{} {}\n".format('uttid', hyp)) with torch.no_grad(), open(args.output, 'w') as fw: for data in test_loader: uttids, xs_pad, len_xs, ys_pad, len_ys = data xs_pad = xs_pad.cuda() ys_pad = ys_pad.cuda() hyps_ints, len_decoded_sorted, scores = model.batch_recognize( xs_pad, len_xs, args.beam_size) process_batch(hyps_ints.cpu().numpy(), scores.cpu().numpy(), idx2token, fw)