Esempio n. 1
0
    def __init__(self, opt, train_loader, val_loader, model, executor):
        self.opt = opt
        self.reinforce = opt.reinforce
        self.reward_decay = opt.reward_decay
        self.entropy_factor = opt.entropy_factor
        self.num_iters = opt.num_iters
        self.run_dir = opt.run_dir
        self.display_every = opt.display_every
        self.checkpoint_every = opt.checkpoint_every
        self.visualize_training = opt.visualize_training
        self.visualize_training_wandb = opt.visualize_training_wandb
        if opt.dataset == 'clevr':
            self.vocab = utils.load_vocab(opt.clevr_vocab_path)
        elif opt.dataset == 'clevr-humans':
            self.vocab = utils.load_vocab(opt.human_vocab_path)
        else:
            raise ValueError('Invalid dataset')

        self.train_loader = train_loader
        self.val_loader = val_loader
        self.model = model
        self.executor = executor

        # Create Optimizer #
        # _params_bline = list(filter(lambda p: p.requires_grad, model.seq2seq_baseline.parameters()))
        _params = list(
            filter(lambda p: p.requires_grad, model.seq2seq.parameters()))
        _params_gnn = list(
            filter(lambda p: p.requires_grad, model.seq2seq.gnn.parameters()))
        _params_enc = list(
            filter(lambda p: p.requires_grad,
                   model.seq2seq.encoder.parameters()))
        # self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.seq2seq.parameters()),
        #                                   lr=opt.learning_rate)
        self.optimizer = torch.optim.Adam(_params, lr=opt.learning_rate)
        self.stats = {
            'train_losses': [],
            'train_batch_accs': [],
            'train_accs_ts': [],
            'val_losses': [],
            'val_accs': [],
            'val_accs_ts': [],
            'best_val_acc': -1,
            'model_t': 0
        }
        if opt.visualize_training:
            # Tensorboard #
            # from reason.utils.logger import Logger
            self.logger = Logger('%s/logs' % opt.run_dir)

        if opt.visualize_training_wandb:
            # WandB: Log metrics with wandb #
            wandb_proj_name = opt.wandb_proj_name
            wandb_identifier = opt.run_identifier
            wandb_name = f"{wandb_identifier}"
            wandb.init(project=wandb_proj_name,
                       name=wandb_name,
                       notes="Running from mgn.reason.trainer.py")
            wandb.config.update(opt)
            wandb.watch(self.model.seq2seq)
Esempio n. 2
0
def finetune(args):
    # Construct Solver
    # data
    token2idx_src, idx2token_src = load_vocab(args.vocab_src)
    token2idx_tgt, idx2token_tgt = load_vocab(args.vocab_tgt)
    args.n_src = len(idx2token_src)
    args.n_tgt = len(idx2token_tgt)

    tr_dataset = VQ_Pred_Dataset(args.train_src,
                                 args.train_tgt,
                                 token2idx_src,
                                 token2idx_tgt,
                                 args.batch_size,
                                 args.maxlen_in,
                                 args.maxlen_out,
                                 down_sample_rate=args.down_sample_rate)
    cv_dataset = VQ_Pred_Dataset(args.valid_src,
                                 args.valid_tgt,
                                 token2idx_src,
                                 token2idx_tgt,
                                 args.batch_size,
                                 args.maxlen_in,
                                 args.maxlen_out,
                                 down_sample_rate=args.down_sample_rate)
    tr_loader = DataLoader(tr_dataset,
                           batch_size=1,
                           collate_fn=f_xy_pad,
                           num_workers=args.num_workers,
                           shuffle=args.shuffle)
    cv_loader = DataLoader(cv_dataset,
                           batch_size=1,
                           collate_fn=f_xy_pad,
                           num_workers=args.num_workers)

    # load dictionary and generate char_list, sos_id, eos_id
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}

    if args.structure == 'BERT':
        from mask_lm.Mask_LM import Mask_LM as Model
        from mask_lm.solver import Mask_LM_Solver as Solver

        model = Model.create_model(args)

    print(model)
    model.cuda()

    # optimizer
    optimizier = TransformerOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.k, args.d_model, args.warmup_steps)

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
Esempio n. 3
0
def get_vocab(opt):
    if opt.dataset == 'clevr':
        vocab_json = opt.clevr_vocab_path
    else:
        raise ValueError('Invalid dataset')
    vocab = utils.load_vocab(vocab_json)
    return vocab
Esempio n. 4
0
class DataPoint:
  if os.path.exists(
     os.path.join(FLAGS["data_dir"],
                  "vocab.chatbot." + str(PROBLEM_HPARAMS["vocabulary_size"]))):
    vocab_dict = load_vocab()

  def __init__(self, string, index, only_string=True):
    """
    Params:
      :string: String to be stored.
      :index: Number of the line in the file from which this sentence was read.
      :only_string: Whether to only store string.
    """
    super().__init__(string, index, only_string)
    self.words = self.string.split()

    # Replace out of vocabulary words.
    for i, word in enumerate(self.words):
      if word not in DataPoint.vocab_dict:
        self.words[i] = "<unk>"
      self.words[i] = DataPoint.vocab_dict[self.words[i]]

    # Transform to counter.
    self.words = Counter(self.words)

  # Distance metric between this and another sentence.
  def distance(self, other_counter, dist_matrix):
    """
    Params:
      :other_counter: The other sentence to which we calculate distance.
      :dist_matrix: Distance matrix for all words in vocab.
    """
    def word_sum(self_counter, other_counter):
      # Compute distance in one way.
      dist_sum = 0
      for self_word in self_counter:
        minimum = 1
        for other_word in other_counter:
          dist = dist_matrix[self_word, other_word]
          if dist < minimum:
            minimum = dist

        count = self_counter[self_word]
        dist_sum += count * minimum

      # Normalize.
      self_length = len(self_counter)
      if self_length != 0:
        dist_sum = dist_sum / self_length
      return dist_sum

    # Calculate the sums for the two sentences.
    first_sum = word_sum(self.words, other_counter.words)
    second_sum = word_sum(other_counter.words, self.words)
    return (first_sum + second_sum) / 2

  # Computes a similarity metric between two sentences.
  def similarity(self, other, dist_matrix):
    return -self.distance(other, dist_matrix)
Esempio n. 5
0
def main(args):
    # Construct Solver
    # data
    token2idx, idx2token = load_vocab(args.vocab)
    vocab_size = len(token2idx)

    tr_dataset = AudioDataset(args.train_json,
                              args.batch_size,
                              args.maxlen_in,
                              args.maxlen_out,
                              batch_frames=args.batch_frames)
    cv_dataset = AudioDataset(args.valid_json,
                              args.batch_size,
                              args.maxlen_in,
                              args.maxlen_out,
                              batch_frames=args.batch_frames)
    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                token2idx=token2idx,
                                num_workers=args.num_workers,
                                shuffle=args.shuffle,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                token2idx=token2idx,
                                num_workers=args.num_workers,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    # load dictionary and generate char_list, sos_id, eos_id
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}
    # model
    encoder = Encoder(args.d_input * args.LFR_m,
                      args.n_layers_enc,
                      args.n_head,
                      args.d_k,
                      args.d_v,
                      args.d_model,
                      args.d_inner,
                      dropout=args.dropout,
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(vocab_size, args.d_model)
    model = CTC_Model(encoder, decoder)
    print(model)
    model.cuda()
    # optimizer
    optimizier = CTCModelOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.warmup_steps)

    # solver
    solver = CTC_Solver(data, model, optimizier, args)
    solver.train()
Esempio n. 6
0
 def __init__(self, question_h5_path, max_samples, vocab_json):
     self.max_samples = max_samples
     question_h5 = h5py.File(question_h5_path, 'r')
     self.questions = torch.LongTensor(
         np.asarray(question_h5['questions'], dtype=np.int64))
     self.image_idxs = np.asarray(question_h5['image_idxs'], dtype=np.int64)
     self.programs, self.answers = None, None
     if 'programs' in question_h5:
         self.programs = torch.LongTensor(
             np.asarray(question_h5['programs'], dtype=np.int64))
     if 'answers' in question_h5:
         self.answers = np.asarray(question_h5['answers'], dtype=np.int64)
     self.vocab = utils.load_vocab(vocab_json)
Esempio n. 7
0
    def __init__(self, opt, train_loader, val_loader, model, executor):
        self.opt = opt
        self.reinforce = opt.reinforce
        self.reward_decay = opt.reward_decay
        self.entropy_factor = opt.entropy_factor
        self.num_iters = opt.num_iters
        self.run_dir = opt.run_dir
        self.display_every = opt.display_every
        self.checkpoint_every = opt.checkpoint_every
        self.visualize_training = opt.visualize_training
        if opt.dataset == 'clevr':
            self.vocab = utils.load_vocab(opt.clevr_vocab_path)
        elif opt.dataset == 'clevr-humans':
            self.vocab = utils.load_vocab(opt.human_vocab_path)
        else:
            raise ValueError('Invalid dataset')

        self.train_loader = train_loader
        self.val_loader = val_loader
        self.model = model
        self.executor = executor
        self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                                 model.seq2seq.parameters()),
                                          lr=opt.learning_rate)

        self.stats = {
            'train_losses': [],
            'train_batch_accs': [],
            'train_accs_ts': [],
            'val_losses': [],
            'val_accs': [],
            'val_accs_ts': [],
            'best_val_acc': -1,
            'model_t': 0
        }
        if opt.visualize_training:
            from reason.utils.logger import Logger
            self.logger = Logger('%s/logs' % opt.run_dir)
Esempio n. 8
0
    def __init__(self, train_scene_json, val_scene_json, vocab_json):
        self.scenes = {
            'train': utils.load_scenes(train_scene_json),
            'val': utils.load_scenes(val_scene_json)
        }
        self.vocab = utils.load_vocab(vocab_json)
        self.colors = CLEVR_COLORS
        self.materials = CLEVR_MATERIALS
        self.shapes = CLEVR_SHAPES
        self.sizes = CLEVR_SIZES
        self.answer_candidates = CLEVR_ANSWER_CANDIDATES

        self.modules = {}
        self._register_modules()
Esempio n. 9
0
    def __init__(self, train_scene_json, val_scene_json, vocab_json, *args,
                 **kwargs):
        self.scenes = {
            'train': utils.load_scenes(train_scene_json),
            'val': utils.load_scenes(val_scene_json)
        }
        self.vocab = utils.load_vocab(vocab_json)
        self.colors = CLEVR_COLORS
        self.materials = CLEVR_MATERIALS
        self.shapes = CLEVR_SHAPES
        self.sizes = CLEVR_SIZES
        self.answer_candidates = CLEVR_ANSWER_CANDIDATES

        self.modules = {}
        self._register_modules()
        self.graph_parser = kwargs.get('graph_parser')
        self.embedder = kwargs.get('embedder')
Esempio n. 10
0
    def __init__(self, opt, split, *args, **kwargs):
        self.max_samples = opt.max_train_samples if split == 'train' \
                                                                else opt.max_val_samples
        self.question_h5_path = opt.clevr_train_question_path if split == 'train' \
                                                                else opt.clevr_val_question_path
        vocab_json = opt.clevr_vocab_path
        self.vocab = utils.load_vocab(vocab_json)
        self.is_directed_graph = opt.is_directed_graph

        #### Init Questions.h5 Data - Invariant same data as in baseline (ques, progs, ans, img_idx) ####
        questions, programs, answers, image_idxs, orig_idxs, question_families = \
            utils.load_data_from_h5(self.question_h5_path)
        self.questions = questions
        self.programs = programs
        self.answers = answers
        self.image_idxs = image_idxs
        self.orig_idxs = orig_idxs
        self.question_families = question_families
        #### Init Graph Data: START ####
        self.graph_data = None
        # Uncomment the below line to activate preprocessed embedding flow
        data_list = self._init_graph_data()     # Load graph_data from preprocessed embeddings
        if data_list:
            logger.info(f"Found preprocessed graph data: self.__init_graph_data(..)")
            data_s_list, data_t_list = data_list
            self.graph_data = list(zip_longest(data_s_list, data_t_list))
        else:
            # Dynamically load graph_data embeddings (skips the preprocessing requirement)
            # N.b Just remove the corresponding *_pairdata.pt file
            logger.debug(f"Preprocessed graph data *_pairdata.pt not found, dynammicall generate g_data")
            logger.info(f"Dynamic Graph Data Gen Flow")
            # raise ValueError if any of the following are None, required for Dynamic Flow
            self.graph_parser = kwargs.get('graph_parser')
            self.embedder = kwargs.get('embedder')
            self.raw_question_path = opt.clevr_train_raw_question_path if split=='train' \
                                                                    else opt.clevr_val_raw_question_path
            self.parsed_img_scene_path = opt.clevr_train_parsed_scene_path if split=='train' \
                                                                    else opt.clevr_val_parsed_scene_path
            logger.debug(f"split: {split}, raw_question_path: {self.raw_question_path}, "
                         f" parsed_img_scene_path: {self.parsed_img_scene_path}")
            try:
                self.raw_questions = get_question_file(self.raw_question_path)
                self.img_scenes = get_img_scenes(self.parsed_img_scene_path)
            except FileNotFoundError as fne:
                logger.error(f"Raw questions.json or parsed image scenes not found: {fne}")
Esempio n. 11
0
def recognize(args):
    model, LFR_m, LFR_n = CTC_Model.load_model(args.model_path)
    print(model)
    model.eval()
    model.cuda()
    token2idx, idx2token = load_vocab(args.dict)
    blank_index = token2idx['<blk>']

    if args.beam_size == 1:
        from ctcModel.ctc_infer import GreedyDecoder

        decode = GreedyDecoder(space_idx=0, blank_index=blank_index)
    else:
        from ctcModel.ctc_infer import BeamDecoder

        decode = BeamDecoder(beam_width=args.beam_size,
                             blank_index=blank_index,
                             space_idx=0)

    # read json data
    with open(args.recog_json, 'rb') as f:
        js = json.load(f)['utts']

    # decode each utterance
    with torch.no_grad(), open(args.output, 'w') as f:
        for idx, name in enumerate(js.keys(), 1):
            print('(%d/%d) decoding %s' % (idx, len(js.keys()), name),
                  flush=True)
            input = kaldi_io.read_mat(js[name]['input'][0]['feat'])  # TxD
            input = build_LFR_features(input, LFR_m, LFR_n)
            input = torch.from_numpy(input).float()
            input_length = torch.tensor([input.size(0)], dtype=torch.int)
            input = input.cuda()
            input_length = input_length.cuda()
            hyps_ints = model.recognize(input, input_length, decode, args)
            hyp = ids2str(hyps_ints, idx2token)[0]
            f.write(name + ' ' + hyp + '\n')
Esempio n. 12
0
def main(args):
    # Construct Solver
    # data
    token2idx, idx2token = load_vocab(args.vocab)
    args.vocab_size = len(token2idx)
    args.sos_id = token2idx['<sos>']
    args.eos_id = token2idx['<eos>']

    tr_dataset = AudioDataset(args.train_json,
                              args.batch_size,
                              args.maxlen_in,
                              args.maxlen_out,
                              batch_frames=args.batch_frames)
    cv_dataset = AudioDataset(args.valid_json,
                              args.batch_size,
                              args.maxlen_in,
                              args.maxlen_out,
                              batch_frames=args.batch_frames)
    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                token2idx=token2idx,
                                label_type=args.label_type,
                                num_workers=args.num_workers,
                                shuffle=args.shuffle,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                token2idx=token2idx,
                                label_type=args.label_type,
                                num_workers=args.num_workers,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    # load dictionary and generate char_list, sos_id, eos_id
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}

    if args.structure == 'transformer':
        from transformer.Transformer import Transformer
        from transformer.solver import Transformer_Solver as Solver

        model = Transformer.create_model(args)

    elif args.structure == 'transformer-ctc':
        from transformer.Transformer import CTC_Transformer as Transformer
        from transformer.solver import Transformer_CTC_Solver as Solver

        model = Transformer.create_model(args)

    elif args.structure == 'conv-transformer-ctc':
        from transformer.Transformer import Conv_CTC_Transformer as Transformer
        from transformer.solver import Transformer_CTC_Solver as Solver

        model = Transformer.create_model(args)

    elif args.structure == 'cif':
        from transformer.CIF_Model import CIF_Model
        from transformer.solver import CIF_Solver as Solver

        model = CIF_Model.create_model(args)

    print(model)
    model.cuda()

    # optimizer
    optimizier = TransformerOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.k, args.d_model, args.warmup_steps)

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
Esempio n. 13
0
                        type=str,
                        default='./',
                        help='Path to save pretrain model.')

    return parser.parse_args()


if __name__ == '__main__':

    args = get_args()
    if args.cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_num
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    word2index, _ = load_vocab(args.vocab_path)

    show_info(epoch=args.epoch, vocab_size=len(word2index), USE_CUDA=args.cuda)

    train_source = lang(filelist=args.train_target,
                        word2index=word2index,
                        PAD=Constants.PAD_WORD,
                        EOS=Constants.EOS_WORD,
                        max_len=args.clip_length)

    train_target_inputs = lang(filelist=args.train_source,
                               word2index=word2index,
                               PAD=Constants.PAD_WORD,
                               BOS=Constants.BOS_WORD,
                               max_len=args.clip_length)
Esempio n. 14
0

if __name__ == '__main__':

    args = get_args()
    if args.cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(args.cuda_num)
        if len(args.cuda_num) == 1:
            device_para = torch.device('cuda:0')
            device_back = torch.device('cuda:0')
        else:
            device_para = torch.device('cuda:0')
            device_back = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    word2index, index2word = load_vocab(args.vocab_path)

    show_info(epoch=args.epoch, vocab_size=len(word2index), USE_CUDA=args.cuda)

    train_source = lang(filelist=args.train_source,
                        word2index=word2index,
                        PAD=Constants.PAD_WORD,
                        max_len=args.clip_length)

    train_target = lang(filelist=args.train_target,
                        word2index=word2index,
                        PAD=Constants.PAD_WORD,
                        max_len=args.clip_length)

    train_source_inputs = lang(filelist=args.train_source,
                               word2index=word2index,
Esempio n. 15
0
def test(args):
    if args.structure == 'transformer':
        from transformer.Transformer import Transformer as Model
    elif args.structure == 'transformer-ctc':
        from transformer.Transformer import CTC_Transformer as Model
    elif args.structure == 'conv-transformer-ctc':
        from transformer.Transformer import Conv_CTC_Transformer as Model
    elif args.structure == 'cif':
        from transformer.CIF_Model import CIF_Model as Model

    token2idx, idx2token = load_vocab(args.vocab)
    args.sos_id = token2idx['<sos>']
    args.eos_id = token2idx['<eos>']
    args.vocab_size = len(token2idx)

    model = Model.load_model(args.model_path, args)
    print(model)
    model.eval()
    model.cuda()

    # read json data
    with open(args.recog_json, 'rb') as f:
        js = json.load(f)['utts']

    cur_time = time.time()
    # decode each utterance

    test_dataset = AudioDataset('/home/easton/projects/OpenASR/egs/aishell1/data/test.json',
                                token2idx, frames_size=1000,
                                len_in_max=1999, len_out_max=99)
    test_loader = DataLoader(test_dataset, batch_size=1,
                             collate_fn=batch_generator(),
                             num_workers=args.num_workers)
    # test_loader = AudioDataLoader(test_dataset, batch_size=1,
    #                             token2idx=token2idx,
    #                             label_type=args.label_type,
    #                             num_workers=args.num_workers,
    #                             LFR_m=args.LFR_m, LFR_n=args.LFR_n)

    def process_batch(hyps, scores, idx2token, fw):
        for nbest, nscore in zip(hyps, scores):
            for n, (hyp, score) in enumerate(zip(nbest, nscore)):
                hyp = hyp.tolist()
                try:
                    eos = hyp.index(3)
                except:
                    eos = None

                hyp = ''.join(idx2token[i] for i in hyp[:eos])
                print("top{}: {} score: {:.3f}\n".format(n+1, hyp, score))
                if n == 0:
                    fw.write("{} {}\n".format('uttid', hyp))

    with torch.no_grad(), open(args.output, 'w') as fw:
        for data in test_loader:
            uttids, xs_pad, len_xs, ys_pad, len_ys = data
            xs_pad = xs_pad.cuda()
            ys_pad = ys_pad.cuda()
            hyps_ints, len_decoded_sorted, scores = model.batch_recognize(
                xs_pad, len_xs, args.beam_size)

            process_batch(hyps_ints.cpu().numpy(), scores.cpu().numpy(), idx2token, fw)