def load_program_generator(path, model_type='PG+EE'): checkpoint = load_cpu(path) kwargs = checkpoint['program_generator_kwargs'] state = checkpoint['program_generator_state'] if model_type == 'FiLM': print('Loading FiLMGen from ' + path) kwargs = get_updated_args(kwargs, FiLMGen) model = FiLMGen(**kwargs) else: print('Loading PG from ' + path) model = Seq2Seq(**kwargs) model.load_state_dict(state) return model, kwargs
def load_program_generator(path, model_type="PG+EE"): checkpoint = load_cpu(path) kwargs = checkpoint["program_generator_kwargs"] state = checkpoint["program_generator_state"] if model_type == "FiLM": print("Loading FiLMGen from " + path) kwargs = get_updated_args(kwargs, FiLMGen) model = FiLMGen(**kwargs) else: print("Loading PG from " + path) model = Seq2Seq(**kwargs) model.load_state_dict(state) return model, kwargs
def load_program_generator(path, model_type='PG+EE'): checkpoint = load_cpu(path) kwargs = checkpoint['program_generator_kwargs'] state = checkpoint['program_generator_state'] if model_type == 'FiLM': print('Loading FiLMGen from ' + path) kwargs = get_updated_args(kwargs, FiLMGen) model = FiLMGen(**kwargs) else: print('Loading PG from ' + path) model = Seq2Seq(**kwargs) state_stemed = {} for k, v in state.iteritems(): k_new = '.'.join(k.split('.')[1:]) state_stemed[k_new] = v model.load_state_dict(state_stemed) return model, kwargs
def load_program_generator(path): checkpoint = load_cpu(path) model_type = checkpoint['args']['model_type'] kwargs = checkpoint['program_generator_kwargs'] state = checkpoint['program_generator_state'] if model_type in ['FiLM', 'MAC', 'RelNet', 'Control-EE']: model = FiLMGen(**kwargs) elif model_type == 'PG+EE' or model_type == 'PG': if checkpoint['args']['ns_vqa']: model = Seq2seqParser(checkpoint['vocab']) else: model = Seq2SeqAtt(**kwargs) else: model = None if model is not None: model.load_state_dict(state) return model, kwargs
def load_program_generator(path, model_type='PG+EE'): checkpoint = load_cpu(path) kwargs = checkpoint['program_generator_kwargs'] state = checkpoint['program_generator_state'] if model_type == 'FiLM': print('Loading FiLMGen from ' + path) kwargs = get_updated_args(kwargs, FiLMGen) model = FiLMGen(**kwargs) new_state_dict = OrderedDict() for k, v in state.items(): name = k[7:] # remove `module.` new_state_dict[name] = v state = new_state_dict else: print('Loading PG from ' + path) model = Seq2Seq(**kwargs) model.load_state_dict(state) return model, kwargs
def load_program_generator(path): checkpoint = load_cpu(path) model_type = checkpoint['args']['model_type'] kwargs = checkpoint['program_generator_kwargs'] state = checkpoint['program_generator_state'] if model_type in ['FiLM', 'MAC', 'RelNet']: kwargs = get_updated_args(kwargs, FiLMGen) model = FiLMGen(**kwargs) elif model_type == 'PG+EE': if kwargs.rnn_attention: model = Seq2SeqAtt(**kwargs) else: model = Seq2Seq(**kwargs) else: model = None if model is not None: model.load_state_dict(state) return model, kwargs
def get_program_generator(vocab, args): # vocab = utils.load_vocab(args.vocab_json) if args.program_generator_start_from is not None: pg, kwargs = utils.load_program_generator( args.program_generator_start_from, model_type=args.model_type) cur_vocab_size = pg.encoder_embed.weight.size(0) if cur_vocab_size != len(vocab['question_token_to_idx']): print('Expanding vocabulary of program generator') pg.expand_encoder_vocab(vocab['question_token_to_idx']) kwargs['encoder_vocab_size'] = len(vocab['question_token_to_idx']) else: kwargs = { 'encoder_vocab_size': len(vocab['question_token_to_idx']), # 'decoder_vocab_size': len(vocab['program_token_to_idx']), 'wordvec_dim': args.rnn_wordvec_dim, 'hidden_dim': args.rnn_hidden_dim, 'rnn_num_layers': args.rnn_num_layers, 'rnn_dropout': args.rnn_dropout, } if args.model_type == 'FiLM': kwargs[ 'parameter_efficient'] = args.program_generator_parameter_efficient == 1 kwargs['output_batchnorm'] = args.rnn_output_batchnorm == 1 kwargs['bidirectional'] = args.bidirectional == 1 kwargs['encoder_type'] = args.encoder_type kwargs['decoder_type'] = args.decoder_type kwargs['gamma_option'] = args.gamma_option kwargs['gamma_baseline'] = args.gamma_baseline kwargs['num_modules'] = args.num_modules kwargs['module_num_layers'] = args.module_num_layers kwargs['module_dim'] = args.module_dim kwargs['debug_every'] = args.debug_every pg = FiLMGen(**kwargs) else: pg = Seq2Seq(**kwargs) pg = pg.to(device=args.device) pg.train() return pg, kwargs
if not os.path.exists(exp_dir): os.mkdir(exp_dir) logger = create_logger(os.path.join(exp_dir, 'log.txt')) logger.info(args) vocab = utils.load_vocab(os.path.join(args.data_dir, 'vocab.json')) film_gen = FiLMGen(encoder_vocab_size=len(vocab['question_token_to_idx']), wordvec_dim=args.rnn_wordvec_dim, hidden_dim=args.rnn_hidden_dim, rnn_num_layers=args.rnn_num_layers, rnn_dropout=0, output_batchnorm=False, bidirectional=False, encoder_type=args.encoder_type, decoder_type=args.decoder_type, gamma_option=args.gamma_option, gamma_baseline=1, num_modules=args.num_modules, module_num_layers=args.module_num_layers, module_dim=args.module_dim, parameter_efficient=True) film_gen = film_gen.cuda() filmed_net = FiLMedNet( vocab, feature_dim=(1024, 14, 14), stem_num_layers=args.module_stem_num_layers, stem_batchnorm=args.module_stem_batchnorm, stem_kernel_size=args.module_stem_kernel_size, stem_stride=1,