def validate(model, data, device, detail=False): count, correct = 0, 0 model.eval() details = { cat:[0,0] for cat in {'count', 'compare number', 'exist', 'query', 'compare attribute'}} print('validate...') for batch in tqdm(data, total=len(data)): answers, questions, *batch_input = [todevice(x, device) for x in batch] logits = model(*batch_input) predicts = logits.max(1)[1] correct += torch.eq(predicts, answers).long().sum().item() count += answers.size(0) if detail: programs = batch_input[0] for i in range(len(answers)): for j in range(len(programs[i])): program = data.vocab['program_idx_to_token'][programs[i][j].item()] if program in ['<NULL>', '<START>', '<END>', '<UNK>', 'unique']: continue cat = map_program_to_cat[program] details[cat][0] += int(predicts[i].item()==answers[i].item()) details[cat][1] += 1 break acc = correct / count if detail: details = { k:(v[0]/v[1]) for k,v in details.items() } return acc, details return acc
def train(args): logging.info("Create train_loader and val_loader.........") train_loader_kwargs = { 'question_pt': args.train_question_pt, 'scene_pt': args.train_scene_pt, 'vocab_json': args.vocab_json, 'batch_size': args.batch_size, 'ratio': args.ratio, 'shuffle': True } val_loader_kwargs = { 'question_pt': args.val_question_pt, 'scene_pt': args.val_scene_pt, 'vocab_json': args.vocab_json, 'batch_size': args.batch_size, 'shuffle': False } train_loader = ClevrDataLoader(**train_loader_kwargs) val_loader = ClevrDataLoader(**val_loader_kwargs) logging.info("Create model.........") device = 'cuda' if torch.cuda.is_available() else 'cpu' model_kwargs = { k:v for k,v in vars(args).items() if k in { 'dim_v', 'dim_pre_v', 'num_edge_cat', 'num_class', 'num_attribute', } } model_kwargs_tosave = copy.deepcopy(model_kwargs) model_kwargs['vocab'] = train_loader.vocab model = XNMNet(**model_kwargs).to(device) logging.info(model) optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.l2reg) scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[int(1/args.ratio)], gamma=0.1) criterion = nn.CrossEntropyLoss().to(device) logging.info("Start training........") tic = time.time() iter_count = 0 for epoch in range(args.num_epoch): for i, batch in enumerate(train_loader.generator()): iter_count += 1 progress = epoch+i/len(train_loader) answers, questions, *batch_input = \ [todevice(x, device) for x in batch] logits, others = model(*batch_input) loss = criterion(logits, answers) optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % (len(train_loader) // 10) == 0: logging.info("Progress %.3f loss = %.3f" % (progress, loss.item())) scheduler.step() if (epoch+1) % 1 == 0: valid_acc = validate(model, val_loader, device) logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' % valid_acc) save_checkpoint(epoch, model, optimizer, model_kwargs_tosave, os.path.join(args.save_dir, 'model.pt')) logging.info(' >>>>>> save to %s <<<<<<' % (args.save_dir))
def validate(model, data, device, detail=False): count, correct = 0, 0 model.eval() details = { cat: [0, 0] for cat in {'count', 'compare number', 'exist', 'query', 'compare attribute'} } print('validate...') for batch in tqdm(data.generator(), total=len(data)): answers, questions, *batch_input = [todevice(x, device) for x in batch] logits, others = model(*batch_input) predicts = logits.max(1)[1] """ There are some counting questions in CLEVR whose answer is a large number (such as 8 and 9). However, as the training instances of such questions are very few, the predictions of our softmax-based classifier can't reach a 100% accuracy for counting questions (we can only reach up to 99.99%). Thanks to our attention mechanism over scene graphs, we can predict the answers of counting questions by directly summing up the node attention, instead of feeding hidden features into a classifier. This alternative strategy gives a 100% counting accuracy. """ # correct += torch.eq(predicts, answers).long().sum().item() count_outputs = others['count_outputs'] for i in range(len(count_outputs)): if count_outputs[i] is None: correct += int(predicts[i].item() == answers[i].item()) else: p = int(round(count_outputs[i].item())) a = int(data.vocab['answer_idx_to_token'][answers[i].item()]) correct += int(p == a) count += answers.size(0) if detail: programs = batch_input[0] for i in range(len(answers)): for j in range(len(programs[i])): program = data.vocab['program_idx_to_token'][programs[i] [j].item()] if program in [ '<NULL>', '<START>', '<END>', '<UNK>', 'unique' ]: continue cat = map_program_to_cat[program] if program == 'count': p = int(round(count_outputs[i].item())) a = int(data.vocab['answer_idx_to_token'][ answers[i].item()]) else: p = predicts[i].item() a = answers[i].item() details[cat][0] += int(p == a) details[cat][1] += 1 break acc = correct / count if detail: details = {k: (v[0] / v[1]) for k, v in details.items()} return acc, details return acc
def test(model, data, device): model.eval() results = [] for batch in tqdm(data, total=len(data)): coco_ids, answers, *batch_input = [todevice(x, device) for x in batch] logits, others = model(*batch_input) predicts = torch.max(logits, dim=1)[1] for predict in predicts: results.append(data.vocab['answer_idx_to_token'][predict.item()]) return results
def validate(model, data, device): count, correct = 0, 0 model.eval() print('validate...') total_acc, count = 0, 0 for batch in tqdm(data, total=len(data)): coco_ids, answers, *batch_input = [todevice(x, device) for x in batch] logits, others = model(*batch_input) acc = batch_accuracy(logits, answers) total_acc += acc.sum().item() count += answers.size(0) acc = total_acc / count return acc
def validate(model, data, device, withLossFlag=False, func=None): print('validate...') model.eval() total_acc, count = 0, 0 total_loss = 0 for batch in tqdm(data, total=len(data)): coco_ids, answers, *batch_input = [todevice(x, device) for x in batch] logits, others = model(*batch_input) if withLossFlag: nll = -func.log_softmax(logits, dim=1) loss = (nll * answers / 10).sum(dim=1).mean() total_loss += loss.item() acc = batch_accuracy(logits, answers) total_acc += acc.sum().item() count += answers.size(0) acc = total_acc / count if withLossFlag: return acc, total_loss / len(data) return acc
def test_with_david_generated_program(model, data, device, pretrained_dir): program_generator = load_program_generator(os.path.join(pretrained_dir, 'program_generator.pt')).to(device) david_vocab = json.load(open(os.path.join(pretrained_dir, 'david_vocab.json'))) david_vocab['program_idx_to_token'] = invert_dict(david_vocab['program_token_to_idx']) results = [] model.eval() for batch in tqdm(data, total=len(data)): _, questions, gt_programs, gt_program_inputs, features, edge_vectors = [todevice(x, device) for x in batch] programs, program_inputs = [], [] # generate program using david model for each question for i in range(questions.size(0)): question_str = [] for j in range(questions.size(1)): word = data.vocab['question_idx_to_token'][questions[i,j].item()] if word == '<START>': continue if word == '<END>': break question_str.append(word) question_str = ' '.join(question_str) # question string david_program = generate_single_program(question_str, program_generator, david_vocab, device) david_program = [david_vocab['program_idx_to_token'][i.item()] for i in david_program.squeeze()] # convert david program to ours. return two index lists program, program_input = convert_david_program_to_mine(david_program, data.vocab) programs.append(program) program_inputs.append(program_input) # padding max_len = max(len(p) for p in programs) for i in range(len(programs)): while len(programs[i]) < max_len: programs[i].append(vocab['program_token_to_idx']['<NULL>']) program_inputs[i].append(vocab['question_token_to_idx']['<NULL>']) # to tensor programs = torch.LongTensor(programs).to(device) program_inputs = torch.LongTensor(program_inputs).to(device) logits = model(programs, program_inputs, features, edge_vectors) predicts = logits.max(1)[1] for predict in predicts: # note questions must not shuffle! results.append(data.vocab['answer_idx_to_token'][predict.item()]) return results
def train(): train_loader = VQADataLoader(**train_loader_kwargs) device = 'cuda' if torch.cuda.is_available() else 'cpu' model_kwargs.update({'vocab': train_loader.vocab,'device': device}) val_loader = VQADataLoader(**val_loader_kwargs) model = XNMNet(**model_kwargs).to(device) train_loader.glove_matrix = torch.FloatTensor(train_loader.glove_matrix).to(device) with torch.no_grad(): model.token_embedding.weight.set_(train_loader.glove_matrix) ################################################################ parameters = [p for p in model.parameters() if p.requires_grad] optimizer = optim.Adam(parameters, lr, weight_decay=0) for epoch in range(num_epoch): model.train() i = 0 for batch in tqdm(train_loader, total=len(train_loader)): progress = epoch + i / len(train_loader) coco_ids, answers, *batch_input = [todevice(x, device) for x in batch] logits, others = model(*batch_input) ##################### loss ##################### nll = -nn.functional.log_softmax(logits, dim=1) loss = (nll * answers / 10).sum(dim=1).mean() ################################################# optimizer.zero_grad() loss.backward() nn.utils.clip_grad_value_(parameters, clip_value=0.5) optimizer.step() if (i + 1) % (len(train_loader) // 50) == 0: logging.info("Progress %.3f ce_loss = %.3f" % (progress, loss.item())) i+=1 train_acc,train_loss = validate(model, train_loader, device,withLossFlag = True,func = nn.functional) logging.info('\n ~~~~~~ Epoch: %.4f ~~~~~~~\n' % epoch) logging.info('\n ~~~~~~ Train Accuracy: %.4f ~~~~~~~\n' % train_acc) logging.info('\n ~~~~~~ Train Loss: %.4f ~~~~~~~\n' % train_loss) valid_acc,valid_loss = validate(model, val_loader, device,withLossFlag = True,func = nn.functional) logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' % valid_acc) logging.info('\n ~~~~~~ Valid Loss: %.4f ~~~~~~~\n' % valid_loss)
def validate_with_david_generated_program(model, data, device, pretrained_dir): program_generator = load_program_generator( os.path.join(pretrained_dir, 'program_generator.pt')).to(device) david_vocab = json.load( open(os.path.join(pretrained_dir, 'david_vocab.json'))) david_vocab['program_idx_to_token'] = invert_dict( david_vocab['program_token_to_idx']) details = { cat: [0, 0] for cat in {'count', 'compare number', 'exist', 'query', 'compare attribute'} } count, correct = 0, 0 model.eval() print('validate...') for batch in tqdm(data.generator(), total=len(data)): answers, questions, gt_programs, gt_program_inputs, *batch_input = [ todevice(x, device) for x in batch ] programs, program_inputs = [], [] # generate program using david model for each question for i in range(questions.size(0)): question_str = [] for j in range(questions.size(1)): word = data.vocab['question_idx_to_token'][questions[i, j].item()] if word == '<START>': continue if word == '<END>': break question_str.append(word) question_str = ' '.join(question_str) # question string david_program = generate_single_program(question_str, program_generator, david_vocab, device) david_program = [ david_vocab['program_idx_to_token'][i.item()] for i in david_program.squeeze() ] # convert david program to ours. return two index lists program, program_input = convert_david_program_to_mine( david_program, data.vocab) programs.append(program) program_inputs.append(program_input) # padding max_len = max(len(p) for p in programs) for i in range(len(programs)): while len(programs[i]) < max_len: programs[i].append(vocab['program_token_to_idx']['<NULL>']) program_inputs[i].append( vocab['question_token_to_idx']['<NULL>']) # to tensor programs = torch.LongTensor(programs).to(device) program_inputs = torch.LongTensor(program_inputs).to(device) logits, others = model(programs, program_inputs, *batch_input) predicts = logits.max(1)[1] correct += torch.eq(predicts, answers).long().sum().item() count += answers.size(0) # details for i in range(len(answers)): for j in range(len(gt_programs[i])): program = data.vocab['program_idx_to_token'][gt_programs[i] [j].item()] if program in [ '<NULL>', '<START>', '<END>', '<UNK>', 'unique' ]: continue cat = map_program_to_cat[program] details[cat][0] += int(predicts[i].item() == answers[i].item()) details[cat][1] += 1 break acc = correct / count details = {k: (v[0] / v[1]) for k, v in details.items()} return acc, details
def train(args): logging.info("Create train_loader and val_loader.........") train_loader_kwargs = { 'question_pt': args.train_question_pt, 'vocab_json': args.vocab_json, 'feature_h5': args.feature_h5, 'batch_size': args.batch_size, 'spatial': args.spatial, 'num_workers': 2, 'shuffle': True } train_loader = VQADataLoader(**train_loader_kwargs) if args.val: val_loader_kwargs = { 'question_pt': args.val_question_pt, 'vocab_json': args.vocab_json, 'feature_h5': args.feature_h5, 'batch_size': args.batch_size, 'spatial': args.spatial, 'num_workers': 2, 'shuffle': False } val_loader = VQADataLoader(**val_loader_kwargs) logging.info("Create model.........") device = 'cuda' if torch.cuda.is_available() else 'cpu' model_kwargs = { 'vocab': train_loader.vocab, 'dim_v': args.dim_v, 'dim_word': args.dim_word, 'dim_hidden': args.dim_hidden, 'dim_vision': args.dim_vision, 'dim_edge': args.dim_edge, 'cls_fc_dim': args.cls_fc_dim, 'dropout_prob': args.dropout, 'T_ctrl': args.T_ctrl, 'glimpses': args.glimpses, 'stack_len': args.stack_len, 'device': device, 'spatial': args.spatial, 'use_gumbel': args.module_prob_use_gumbel == 1, 'use_validity': args.module_prob_use_validity == 1, } model_kwargs_tosave = { k: v for k, v in model_kwargs.items() if k != 'vocab' } model = XNMNet(**model_kwargs).to(device) logging.info(model) logging.info('load glove vectors') train_loader.glove_matrix = torch.FloatTensor( train_loader.glove_matrix).to(device) model.token_embedding.weight.data.set_(train_loader.glove_matrix) ################################################################ parameters = [p for p in model.parameters() if p.requires_grad] optimizer = optim.Adam(parameters, args.lr, weight_decay=0) start_epoch = 0 if args.restore: print("Restore checkpoint and optimizer...") ckpt = os.path.join(args.save_dir, 'model.pt') ckpt = torch.load(ckpt, map_location={'cuda:0': 'cpu'}) start_epoch = ckpt['epoch'] + 1 model.load_state_dict(ckpt['state_dict']) optimizer.load_state_dict(ckpt['optimizer']) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, 0.5**(1 / args.lr_halflife)) logging.info("Start training........") for epoch in range(start_epoch, args.num_epoch): model.train() for i, batch in enumerate(train_loader): progress = epoch + i / len(train_loader) coco_ids, answers, *batch_input = [ todevice(x, device) for x in batch ] logits, others = model(*batch_input) ##################### loss ##################### nll = -nn.functional.log_softmax(logits, dim=1) loss = (nll * answers / 10).sum(dim=1).mean() ################################################# scheduler.step() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_value_(parameters, clip_value=0.5) optimizer.step() if (i + 1) % (len(train_loader) // 50) == 0: logging.info("Progress %.3f ce_loss = %.3f" % (progress, loss.item())) save_checkpoint(epoch, model, optimizer, model_kwargs_tosave, os.path.join(args.save_dir, 'model.pt')) logging.info(' >>>>>> save to %s <<<<<<' % (args.save_dir)) if args.val: valid_acc = validate(model, val_loader, device) logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' % valid_acc)