def main(): dataset = MoleculeDataset('ChEMBL', 'canonical', ['train', 'val']) train_loader = DataLoader(dataset.train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=dataset.collate) val_loader = DataLoader(dataset.val_set, batch_size=args['batch_size'], shuffle=True, collate_fn=dataset.collate) model = SGVAE(atom_types=dataset.atom_types, bond_types=dataset.bond_types, node_hidden_size=args['node_hidden_size'], num_prop_rounds=args['num_propagation_rounds'], dropout=args['dropout']) if args['num_processes'] == 1: from utils import Optimizer optimizer = Optimizer(args['lr'], Adam(model.parameters(), lr=args['lr'])) else: from utils import MultiProcessOptimizer optimizer = MultiProcessOptimizer( args['num_processes'], args['lr'], Adam(model.parameters(), lr=args['lr'])) if rank == 0: t2 = time.time() best_val_prob = 0 # Training for epoch in range(args['nepochs']): model.train() if rank == 0: print('Training') for i, data in enumerate(train_loader): log_prob = model(actions=data, compute_log_prob=True) prob = log_prob.detach().exp() loss_averaged = -log_prob prob_averaged = prob optimizer.backward_and_step(loss_averaged) if rank == 0: train_printer.update(epoch + 1, loss_averaged.item(), prob_averaged.item())
def train(self, model, data_loader, batch_size, n_epoch, template_flag, \ resume=False, optimizer=None, mode=0, teacher_forcing_ratio=0, post_flag=False): self.evaluator = Evaluator( vocab_dict=self.vocab_dict, vocab_list=self.vocab_list, decode_classes_dict=self.decode_classes_dict, decode_classes_list=self.decode_classes_list, loss=NLLLoss(), cuda_use=self.cuda_use) if resume: checkpoint_path = Checkpoint.get_certain_checkpoint( "./experiment", "best") resume_checkpoint = Checkpoint.load(checkpoint_path) model = resume_checkpoint.model self.optimizer = resume_checkpoint.optimizer resume_optim = self.optimizer.optimizer defaults = resume_optim.param_groups[0] defaults.pop('params', None) self.optimizer.optimizer = resume_optim.__class__( model.parameters(), **defaults) start_epoch = resume_checkpoint.epoch start_step = resume_checkpoint.step self.train_acc_list = resume_checkpoint.train_acc_list self.test_acc_list = resume_checkpoint.test_acc_list self.loss_list = resume_checkpoint.loss_list else: start_epoch = 1 start_step = 0 self.train_acc_list = [] self.test_acc_list = [] self.loss_list = [] model_opt = NoamOpt( 512, 1, 2000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if optimizer is None: optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=0) self.optimizer = model_opt self._train_epoches(data_loader=data_loader, model=model, batch_size=batch_size, start_epoch=start_epoch, start_step=start_step, n_epoch=n_epoch, mode=mode, template_flag=template_flag, teacher_forcing_ratio=teacher_forcing_ratio, post_flag=post_flag)
def get_model(): net = Net(config) net = net.cuda() loss = Loss(config).cuda() post_process = PostProcess(config).cuda() params = net.parameters() opt = Optimizer(params, config) return config, ArgoDataset, collate_fn, net, loss, post_process, opt
def __init__(self, hparams, trainable): self.trainable = trainable self.hparams = hparams self.image_shape = [224, 224, 3] self.license_number_list = hparams.license_number_list self.is_train = tf.placeholder_with_default(False, shape=[], name='is_train') self.layers = NeuralLayers(trainable=self.trainable, is_train=self.is_train, hparams=self.hparams) self.optimizer_builder = Optimizer(hparams=hparams) self.saver = None self.build_resnet50() if trainable: self.build_optimizer() self.build_metrics() self.build_summary()
def Loading(self): self.device.set_device(self.arg.device) print("Loading model") if self.arg.model: model_class = import_class(self.arg.model) model = self.device.model_to_device( model_class(**self.arg.model_args)) if self.arg.weights: try: print("Loading pretrained model...") state_dict = torch.load(self.arg.weights) for w in self.arg.ignore_weights: if state_dict.pop(w, None) is not None: print('Sucessfully Remove Weights: {}.'.format(w)) else: print('Can Not Remove Weights: {}.'.format(w)) model.load_state_dict(state_dict, strict=True) optimizer = Optimizer(model, self.arg.optimizer_args) except RuntimeError: print("Loading from checkpoint...") state_dict = torch.load(self.arg.weights) self.rng.set_rng_state(state_dict['rng_state']) self.arg.optimizer_args[ 'start_epoch'] = state_dict["epoch"] + 1 self.recoder.print_log( "Resuming from checkpoint: epoch {}".format( self.arg.optimizer_args['start_epoch'])) model = self.device.load_weights(model, self.arg.weights, self.arg.ignore_weights) optimizer = Optimizer(model, self.arg.optimizer_args) optimizer.optimizer.load_state_dict( state_dict["optimizer_state_dict"]) optimizer.scheduler.load_state_dict( state_dict["scheduler_state_dict"]) else: optimizer = Optimizer(model, self.arg.optimizer_args) else: raise ValueError("No Models.") print("Loading model finished.") self.load_data() return model, optimizer
def main(args, path_to_candidate_bonds): if args['train_path'] is None: train_set = USPTORank( subset='train', candidate_bond_path=path_to_candidate_bonds['train'], max_num_change_combos_per_reaction=args[ 'max_num_change_combos_per_reaction_train'], num_processes=args['num_processes']) else: train_set = WLNRankDataset( path_to_reaction_file=args['train_path'], candidate_bond_path=path_to_candidate_bonds['train'], mode='train', max_num_change_combos_per_reaction=args[ 'max_num_change_combos_per_reaction_train'], num_processes=args['num_processes']) train_set.ignore_large() if args['val_path'] is None: val_set = USPTORank(subset='val', candidate_bond_path=path_to_candidate_bonds['val'], max_num_change_combos_per_reaction=args[ 'max_num_change_combos_per_reaction_eval'], num_processes=args['num_processes']) else: val_set = WLNRankDataset( path_to_reaction_file=args['val_path'], candidate_bond_path=path_to_candidate_bonds['val'], mode='val', max_num_change_combos_per_reaction=args[ 'max_num_change_combos_per_reaction_eval'], num_processes=args['num_processes']) if args['num_workers'] > 1: torch.multiprocessing.set_sharing_strategy('file_system') train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_rank_train, shuffle=True, num_workers=args['num_workers']) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_rank_eval, shuffle=False, num_workers=args['num_workers']) model = WLNReactionRanking( node_in_feats=args['node_in_feats'], edge_in_feats=args['edge_in_feats'], node_hidden_feats=args['hidden_size'], num_encode_gnn_layers=args['num_encode_gnn_layers']).to(args['device']) criterion = CrossEntropyLoss(reduction='sum') optimizer = Adam(model.parameters(), lr=args['lr']) from utils import Optimizer optimizer = Optimizer(model, args['lr'], optimizer, max_grad_norm=args['max_norm']) acc_sum = 0 grad_norm_sum = 0 dur = [] total_samples = 0 for epoch in range(args['num_epochs']): t0 = time.time() model.train() for batch_id, batch_data in enumerate(train_loader): batch_reactant_graphs, batch_product_graphs, \ batch_combo_scores, batch_labels, batch_num_candidate_products = batch_data batch_reactant_graphs = batch_reactant_graphs.to(args['device']) batch_product_graphs = batch_product_graphs.to(args['device']) batch_combo_scores = batch_combo_scores.to(args['device']) batch_labels = batch_labels.to(args['device']) reactant_node_feats = batch_reactant_graphs.ndata.pop('hv').to( args['device']) reactant_edge_feats = batch_reactant_graphs.edata.pop('he').to( args['device']) product_node_feats = batch_product_graphs.ndata.pop('hv').to( args['device']) product_edge_feats = batch_product_graphs.edata.pop('he').to( args['device']) pred = model( reactant_graph=batch_reactant_graphs, reactant_node_feats=reactant_node_feats, reactant_edge_feats=reactant_edge_feats, product_graphs=batch_product_graphs, product_node_feats=product_node_feats, product_edge_feats=product_edge_feats, candidate_scores=batch_combo_scores, batch_num_candidate_products=batch_num_candidate_products) # Check if the ground truth candidate has the highest score batch_loss = 0 product_graph_start = 0 for i in range(len(batch_num_candidate_products)): product_graph_end = product_graph_start + batch_num_candidate_products[ i] reaction_pred = pred[product_graph_start:product_graph_end, :] acc_sum += float( reaction_pred.max( dim=0)[1].detach().cpu().data.item() == 0) batch_loss += criterion(reaction_pred.reshape(1, -1), batch_labels[i, :]) product_graph_start = product_graph_end grad_norm_sum += optimizer.backward_and_step(batch_loss) total_samples += args['batch_size'] if total_samples % args['print_every'] == 0: progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | time {:.4f} | ' \ 'accuracy {:.4f} | grad norm {:.4f}'.format( epoch + 1, args['num_epochs'], (batch_id + 1) * args['batch_size'] // args['print_every'], len(train_set) // args['print_every'], (sum(dur) + time.time() - t0) / total_samples * args['print_every'], acc_sum / args['print_every'], grad_norm_sum / args['print_every']) print(progress) acc_sum = 0 grad_norm_sum = 0 if total_samples % args['decay_every'] == 0: dur.append(time.time() - t0) old_lr = optimizer.lr optimizer.decay_lr(args['lr_decay_factor']) new_lr = optimizer.lr print('Learning rate decayed from {:.4f} to {:.4f}'.format( old_lr, new_lr)) torch.save({'model_state_dict': model.state_dict()}, args['result_path'] + '/model_{:d}.pkl'.format(total_samples)) prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d})\n'.format( total_samples, epoch + 1, args['num_epochs'], (batch_id + 1) * args['batch_size'] // args['print_every'], len(train_set) // args['print_every']) + candidate_ranking_eval( args, model, val_loader) print(prediction_summary) with open(args['result_path'] + '/val_eval.txt', 'a') as f: f.write(prediction_summary) t0 = time.time() model.train()
def main(args): ts = datetime.datetime.now().timestamp() logger = SummaryWriter( os.path.join('exp/qgen_rl/', '{}_{}'.format(args.exp_name, ts))) logger.add_text('exp_name', args.exp_name) logger.add_text('args', str(args)) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') vocab = Vocab(os.path.join(args.data_dir, 'vocab.csv'), 3) category_vocab = CategoryVocab( os.path.join(args.data_dir, 'categories.csv')) data_loader = OrderedDict() splits = ['train', 'valid'] + (['test'] if args.test_set else list()) for split in splits: file = os.path.join(args.data_dir, 'guesswhat.' + split + '.jsonl.gz') data_loader[split] = DataLoader( dataset=InferenceDataset(split, file, vocab, category_vocab, new_object=split == 'train', load_vgg_features=True), batch_size=args.batch_size, collate_fn=InferenceDataset.get_collate_fn(device), shuffle=split == 'train') if not args.belief_state: qgen = QGen.load(device, file=args.qgen_file) else: qgen = QGenBelief.load(device, file=args.qgen_file) guesser = Guesser.load(device, file=args.guesser_file) oracle = Oracle.load(device, file=args.oracle_file) generation_wrapper = GenerationWrapper(qgen, guesser, oracle) baseline = MLP( sizes=[qgen.hidden_size, args.baseline_hidden_size, 1], activation='relu', final_activation='relu', bias=[True, False])\ .to(device) baseline_loss_fn = torch.nn.MSELoss(reduction='sum') baseline_optimizer = Optimizer(torch.optim.SGD, baseline.parameters(), lr=args.baseline_lr) qgen_optimizer = Optimizer(torch.optim.SGD, qgen.parameters(), lr=args.qgen_lr) split2strat = { 'train': args.train_strategy, 'valid': args.eval_strategy, 'test': args.eval_strategy } best_val_acc = 0 for epoch in range(args.epochs): for split in splits: if split == 'train': qgen.train() baseline.train() torch.enable_grad() else: qgen.eval() baseline.eval() torch.no_grad() total_acc = list() for iteration, sample in enumerate(data_loader[split]): return_dict = generation_wrapper.generate( sample, vocab, split2strat[split], args.max_num_questions, device, args.belief_state, return_keys=[ 'mask', 'object_logits', 'hidden_states', 'log_probs', 'generations' ]) mask = return_dict['mask'] object_logits = return_dict['object_logits'] hidden_states = return_dict['hidden_states'] log_probs = return_dict['log_probs'] acc = accuarcy(object_logits, sample['target_id']) total_acc += [acc] mask = mask.float() rewards = torch.eq( object_logits.topk(1)[1].view(-1), sample['target_id'].view(-1)).float() rewards = rewards.unsqueeze(1).repeat(1, mask.size(1)) rewards *= mask print("dialogue", return_dict['dialogue'][0], return_dict['dialogue'].size()) #print("log_probs", log_probs, log_probs.size()) #print("mask", mask, mask.size()) #print("rewards", rewards, rewards.size()) baseline_preds = baseline(hidden_states.detach_()).squeeze(2) baseline_preds *= mask baseline_loss = baseline_loss_fn( baseline_preds.view(-1), rewards.view(-1)) \ / baseline_preds.size(0) log_probs *= mask baseline_preds = baseline_preds.detach() policy_gradient_loss = torch.sum(log_probs * (rewards - baseline_preds), dim=1) print(policy_gradient_loss) policy_gradient_loss = -torch.mean(policy_gradient_loss) print() raise # policy_gradient_loss = - torch.sum(log_probs) / torch.sum(mask) #print(policy_gradient_loss_old.item(), policy_gradient_loss.item()) if split == 'train': qgen_optimizer.optimize(policy_gradient_loss, clip_norm_args=[args.clip_value]) baseline_optimizer.optimize( baseline_loss, clip_norm_args=[args.clip_value]) logger.add_scalar('{}_accuracy'.format(split), acc, iteration + len(data_loader[split]) * epoch) logger.add_scalar('{}_reward'.format(split), torch.mean(rewards).item(), iteration + len(data_loader[split]) * epoch) logger.add_scalar('{}_bl_loss'.format(split), baseline_loss.item(), iteration + len(data_loader[split]) * epoch) logger.add_scalar('{}_pg_loss'.format(split), policy_gradient_loss.item(), iteration + len(data_loader[split]) * epoch) model_saved = False if split == 'valid': if np.mean(total_acc) > best_val_acc: best_val_acc = np.mean(total_acc) qgen.save(file='bin/qgen_rl_{}_{}.pt'.format( args.exp_name, ts), accuarcy=np.mean(total_acc)) model_saved = True logger.add_scalar('epoch_{}_accuracy'.format(split), np.mean(total_acc), epoch) print("Epoch {:3d}: {} Accuracy {:5.3f} {}".format( epoch, split.upper(), np.mean(total_acc) * 100, '*' if model_saved else '')) print("-" * 50)
def main(rank, args): """ Parameters ---------- rank : int Subprocess id args : dict Configuration """ if rank == 0: t1 = time.time() set_random_seed(args['seed']) # Remove the line below will result in problems for multiprocess torch.set_num_threads(1) # Setup dataset and data loader dataset = MoleculeDataset(args['dataset'], args['order'], ['train', 'val'], subset_id=rank, n_subsets=args['num_processes']) # Note that currently the batch size for the loaders should only be 1. train_loader = DataLoader(dataset.train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=dataset.collate) val_loader = DataLoader(dataset.val_set, batch_size=args['batch_size'], shuffle=True, collate_fn=dataset.collate) if rank == 0: try: from tensorboardX import SummaryWriter writer = SummaryWriter(args['log_dir']) except ImportError: print( 'If you want to use tensorboard, install tensorboardX with pip.' ) writer = None train_printer = Printer(args['nepochs'], len(dataset.train_set), args['batch_size'], writer) val_printer = Printer(args['nepochs'], len(dataset.val_set), args['batch_size']) else: val_printer = None # Initialize model model = DGMG(atom_types=dataset.atom_types, bond_types=dataset.bond_types, node_hidden_size=args['node_hidden_size'], num_prop_rounds=args['num_propagation_rounds'], dropout=args['dropout']) if args['num_processes'] == 1: from utils import Optimizer optimizer = Optimizer(args['lr'], Adam(model.parameters(), lr=args['lr'])) else: from utils import MultiProcessOptimizer optimizer = MultiProcessOptimizer( args['num_processes'], args['lr'], Adam(model.parameters(), lr=args['lr'])) if rank == 0: t2 = time.time() best_val_prob = 0 # Training for epoch in range(args['nepochs']): model.train() if rank == 0: print('Training') for i, data in enumerate(train_loader): log_prob = model(actions=data, compute_log_prob=True) prob = log_prob.detach().exp() loss_averaged = -log_prob prob_averaged = prob optimizer.backward_and_step(loss_averaged) if rank == 0: train_printer.update(epoch + 1, loss_averaged.item(), prob_averaged.item()) synchronize(args['num_processes']) # Validation val_log_prob = evaluate(epoch, model, val_loader, val_printer) if args['num_processes'] > 1: dist.all_reduce(val_log_prob, op=dist.ReduceOp.SUM) val_log_prob /= args['num_processes'] # Strictly speaking, the computation of probability here is different from what is # performed on the training set as we first take an average of log likelihood and then # take the exponentiation. By Jensen's inequality, the resulting value is then a # lower bound of the real probabilities. val_prob = (-val_log_prob).exp().item() val_log_prob = val_log_prob.item() if val_prob >= best_val_prob: if rank == 0: torch.save({'model_state_dict': model.state_dict()}, args['checkpoint_dir']) print( 'Old val prob {:.10f} | new val prob {:.10f} | model saved' .format(best_val_prob, val_prob)) best_val_prob = val_prob elif epoch >= args['warmup_epochs']: optimizer.decay_lr() if rank == 0: print('Validation') if writer is not None: writer.add_scalar('validation_log_prob', val_log_prob, epoch) writer.add_scalar('validation_prob', val_prob, epoch) writer.add_scalar('lr', optimizer.lr, epoch) print('Validation log prob {:.4f} | prob {:.10f}'.format( val_log_prob, val_prob)) synchronize(args['num_processes']) if rank == 0: t3 = time.time() print('It took {} to setup.'.format(datetime.timedelta(seconds=t2 - t1))) print('It took {} to finish training.'.format( datetime.timedelta(seconds=t3 - t2))) print( '--------------------------------------------------------------------------' ) print('On average, an epoch takes {}.'.format( datetime.timedelta(seconds=(t3 - t2) / args['nepochs'])))
def main(rank, dev_id, args): set_seed() # Remove the line below will result in problems for multiprocess if args['num_devices'] > 1: torch.set_num_threads(1) if dev_id == -1: args['device'] = torch.device('cpu') else: args['device'] = torch.device('cuda:{}'.format(dev_id)) # Set current device torch.cuda.set_device(args['device']) train_set, val_set = load_dataset(args) get_center_subset(train_set, rank, args['num_devices']) train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_center, shuffle=True) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_center, shuffle=False) model = WLNReactionCenter(node_in_feats=args['node_in_feats'], edge_in_feats=args['edge_in_feats'], node_pair_in_feats=args['node_pair_in_feats'], node_out_feats=args['node_out_feats'], n_layers=args['n_layers'], n_tasks=args['n_tasks']).to(args['device']) model.train() if rank == 0: print('# trainable parameters in the model: ', count_parameters(model)) criterion = BCEWithLogitsLoss(reduction='sum') optimizer = Adam(model.parameters(), lr=args['lr']) if args['num_devices'] <= 1: from utils import Optimizer optimizer = Optimizer(model, args['lr'], optimizer, max_grad_norm=args['max_norm']) else: from utils import MultiProcessOptimizer optimizer = MultiProcessOptimizer(args['num_devices'], model, args['lr'], optimizer, max_grad_norm=args['max_norm']) total_iter = 0 rank_iter = 0 grad_norm_sum = 0 loss_sum = 0 dur = [] for epoch in range(args['num_epochs']): t0 = time.time() for batch_id, batch_data in enumerate(train_loader): total_iter += args['num_devices'] rank_iter += 1 batch_reactions, batch_graph_edits, batch_mol_graphs, \ batch_complete_graphs, batch_atom_pair_labels = batch_data labels = batch_atom_pair_labels.to(args['device']) pred, biased_pred = reaction_center_prediction( args['device'], model, batch_mol_graphs, batch_complete_graphs) loss = criterion(pred, labels) / len(batch_reactions) loss_sum += loss.cpu().detach().data.item() grad_norm_sum += optimizer.backward_and_step(loss) if rank_iter % args['print_every'] == 0 and rank == 0: progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | ' \ 'loss {:.4f} | grad norm {:.4f}'.format( epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader), loss_sum / args['print_every'], grad_norm_sum / args['print_every']) print(progress) grad_norm_sum = 0 loss_sum = 0 if total_iter % args['decay_every'] == 0: optimizer.decay_lr(args['lr_decay_factor']) if total_iter % args['decay_every'] == 0 and rank == 0: if epoch >= 1: dur.append(time.time() - t0) print('Training time per {:d} iterations: {:.4f}'.format( rank_iter, np.mean(dur))) total_samples = total_iter * args['batch_size'] prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d}) '.format( total_samples, epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader)) + \ reaction_center_final_eval(args, args['top_ks_val'], model, val_loader, easy=True) print(prediction_summary) with open(args['result_path'] + '/val_eval.txt', 'a') as f: f.write(prediction_summary) torch.save({'model_state_dict': model.state_dict()}, args['result_path'] + '/model_{:d}.pkl'.format(total_samples)) t0 = time.time() model.train() synchronize(args['num_devices'])
class Recognizer: def __init__(self, hparams, trainable): self.trainable = trainable self.hparams = hparams self.image_shape = [224, 224, 3] self.license_number_list = hparams.license_number_list self.is_train = tf.placeholder_with_default(False, shape=[], name='is_train') self.layers = NeuralLayers(trainable=self.trainable, is_train=self.is_train, hparams=self.hparams) self.optimizer_builder = Optimizer(hparams=hparams) self.saver = None self.build_resnet50() if trainable: self.build_optimizer() self.build_metrics() self.build_summary() def build_resnet50(self): hparams = self.hparams images = tf.placeholder(dtype=tf.float32, shape=[None] + self.image_shape) conv1_feats = self.layers.conv2d(images, filters=64, kernel_size=(7, 7), strides=(2, 2), activation=None, name='conv1') conv1_feats = self.layers.batch_norm(conv1_feats, 'bn_conv1') conv1_feats = tf.nn.relu(conv1_feats) pool1_feats = self.layers.max_pool2d(conv1_feats, pool_size=(3, 3), strides=(2, 2), name='pool1') res2a_feats = self.identity_block_with_output_reduced( pool1_feats, 'res2a', 'bn2a', 64, (1, 1)) res2b_feats = self.identity_block(res2a_feats, 'res2b', 'bn2b', 64) res2c_feats = self.identity_block(res2b_feats, 'res2c', 'bn2c', 64) res3a_feats = self.identity_block_with_output_reduced( res2c_feats, 'res3a', 'bn3a', 128) res3b_feats = self.identity_block(res3a_feats, 'res3b', 'bn3b', 128) res3c_feats = self.identity_block(res3b_feats, 'res3c', 'bn3c', 128) res3d_feats = self.identity_block(res3c_feats, 'res3d', 'bn3d', 128) res4a_feats = self.identity_block_with_output_reduced( res3d_feats, 'res4a', 'bn4a', 256) res4b_feats = self.identity_block(res4a_feats, 'res4b', 'bn4b', 256) res4c_feats = self.identity_block(res4b_feats, 'res4c', 'bn4c', 256) res4d_feats = self.identity_block(res4c_feats, 'res4d', 'bn4d', 256) res4e_feats = self.identity_block(res4d_feats, 'res4e', 'bn4e', 256) res4f_feats = self.identity_block(res4e_feats, 'res4f', 'bn4f', 256) res5a_feats = self.identity_block_with_output_reduced( res4f_feats, 'res5a', 'bn5a', 512) res5b_feats = self.identity_block(res5a_feats, 'res5b', 'bn5b', 512) res5c_feats = self.identity_block(res5b_feats, 'res5c', 'bn5c', 512) global_avg_pool = self.layers.global_avg_pool2d(res5c_feats, keepdims=False, name='global_avg_pool') global_avg_pool = self.layers.dropout(global_avg_pool, name='global_avg_pool_dropout') logits = [] probabilities = [] predictions = [] for i, num_list in enumerate(self.license_number_list): logit = self.layers.dense(global_avg_pool, units=len(num_list), activation=None, name='num_{}'.format(i)) probability = tf.nn.softmax(logit) prediction = tf.argmax(probability, axis=1) logits.append(logit) probabilities.append(probability) predictions.append(prediction) self.images = images self.logits = logits self.probabilities = probabilities self.predictions = predictions def identity_block_with_output_reduced(self, inputs, name1, name2, filters, strides=(2, 2)): """ A basic block of ResNet. """ branch1_feats = self.layers.conv2d(inputs, filters=4 * filters, kernel_size=(1, 1), strides=strides, activation=None, use_bias=False, name=name1 + '_branch1') branch1_feats = self.layers.batch_norm(branch1_feats, name2 + '_branch1') branch2a_feats = self.layers.conv2d(inputs, filters=filters, kernel_size=(1, 1), strides=(1, 1), activation=None, use_bias=False, name=name1 + '_branch2a') branch2a_feats = self.layers.batch_norm(branch2a_feats, name2 + '_branch2a') branch2a_feats = tf.nn.relu(branch2a_feats) branch2b_feats = self.layers.conv2d(branch2a_feats, filters=filters, kernel_size=(3, 3), strides=strides, activation=None, use_bias=False, name=name1 + '_branch2b') branch2b_feats = self.layers.batch_norm(branch2b_feats, name2 + '_branch2b') branch2b_feats = tf.nn.relu(branch2b_feats) branch2c_feats = self.layers.conv2d(branch2b_feats, filters=4 * filters, kernel_size=(1, 1), strides=(1, 1), activation=None, use_bias=False, name=name1 + '_branch2c') branch2c_feats = self.layers.batch_norm(branch2c_feats, name2 + '_branch2c') outputs = branch1_feats + branch2c_feats outputs = tf.nn.relu(outputs) return outputs def identity_block(self, inputs, name1, name2, filters): """ Another basic block of ResNet. """ branch2a_feats = self.layers.conv2d(inputs, filters=filters, kernel_size=(1, 1), strides=(1, 1), activation=None, use_bias=False, name=name1 + '_branch2a') branch2a_feats = self.layers.batch_norm(branch2a_feats, name2 + '_branch2a') branch2a_feats = tf.nn.relu(branch2a_feats) branch2b_feats = self.layers.conv2d(branch2a_feats, filters=filters, kernel_size=(3, 3), strides=(1, 1), activation=None, use_bias=False, name=name1 + '_branch2b') branch2b_feats = self.layers.batch_norm(branch2b_feats, name2 + '_branch2b') branch2b_feats = tf.nn.relu(branch2b_feats) branch2c_feats = self.layers.conv2d(branch2b_feats, filters=4 * filters, kernel_size=(1, 1), strides=(1, 1), activation=None, use_bias=False, name=name1 + '_branch2c') branch2c_feats = self.layers.batch_norm(branch2c_feats, name2 + '_branch2c') outputs = inputs + branch2c_feats outputs = tf.nn.relu(outputs) return outputs def identity_block_without_bottleneck(self, inputs, name1, name2, filters): """ Another basic block of ResNet. """ branch2a_feats = self.layers.conv2d(inputs, filters=filters, kernel_size=(3, 3), strides=(1, 1), activation=None, use_bias=False, name=name1 + '_branch2a') branch2a_feats = self.layers.batch_norm(branch2a_feats, name2 + '_branch2a') branch2a_feats = tf.nn.relu(branch2a_feats) branch2b_feats = self.layers.conv2d(branch2a_feats, filters=filters, kernel_size=(3, 3), strides=(1, 1), activation=None, use_bias=False, name=name1 + '_branch2b') branch2b_feats = self.layers.batch_norm(branch2b_feats, name2 + '_branch2b') outputs = inputs + branch2b_feats outputs = tf.nn.relu(outputs) return outputs def se_block(self, inputs, filters, name, ratio=16): avgpool = self.layers.global_avg_pool2d(inputs=inputs, keepdims=False, name=name + '_avgpool') dense1 = self.layers.dense(inputs=avgpool, units=filters / ratio, activation=tf.nn.relu, name=name + '_dense') weighted = self.layers.dense(inputs=dense1, units=filters, activation=tf.nn.sigmoid, name=name + '_weighted') weighted = tf.reshape(weighted, (-1, 1, 1, filters)) outputs = tf.multiply(inputs, weighted) return outputs def build_optimizer(self): hparams = self.hparams global_step = tf.train.get_or_create_global_step() labels = tf.placeholder(dtype=tf.int64, shape=[None, len(self.license_number_list)]) num_losses = [] min_len = np.min([len(n) for n in self.license_number_list]) losses = [] for i, num_list in enumerate(self.license_number_list): loss = tf.losses.sparse_softmax_cross_entropy( labels=labels[:, i], logits=self.logits[i]) num_losses.append(loss) weight = len(num_list) / min_len loss = weight * loss losses.append(loss) cross_entropy_loss = tf.add_n(losses) regularization_loss = tf.losses.get_regularization_loss() total_loss = cross_entropy_loss + regularization_loss learning_rate = self.optimizer_builder.compute_learning_rate( global_step) optimizer = self.optimizer_builder.build(name=hparams.optimizer, learning_rate=learning_rate) gradients, variables = zip(*optimizer.compute_gradients(total_loss, )) gradients, _ = tf.clip_by_global_norm(gradients, hparams.clip_gradients) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) train_op = tf.group([train_op, update_ops]) self.global_step = global_step self.labels = labels self.num_losses = num_losses self.cross_entropy_loss = cross_entropy_loss self.regularization_loss = regularization_loss self.total_loss = total_loss self.learning_rate = learning_rate self.optimizer = optimizer self.train_op = train_op def build_metrics(self): avg_cross_entropy_loss, avg_cross_entropy_loss_op = tf.metrics.mean_tensor( self.cross_entropy_loss) avg_reg_loss, avg_reg_loss_op = tf.metrics.mean_tensor( self.regularization_loss) avg_total_loss, avg_total_loss_op = tf.metrics.mean_tensor( self.total_loss) predictions = tf.stack(self.predictions, axis=1) partial_accuracy, partial_accuracy_op = tf.metrics.accuracy( labels=self.labels, predictions=predictions) matches = tf.reduce_all(tf.equal(self.labels, predictions), axis=1) accuracy, accuracy_op = tf.metrics.accuracy( labels=tf.ones_like(matches), predictions=matches) self.metrics = { 'cross_entropy_loss': avg_cross_entropy_loss, 'regularization_loss': avg_reg_loss, 'total_loss': avg_total_loss, 'partial_accuracy': partial_accuracy, 'accuracy': accuracy } self.metric_ops = { 'cross_entropy_loss': avg_cross_entropy_loss_op, 'regularization_loss': avg_reg_loss_op, 'total_loss': avg_total_loss_op, 'partial_accuracy': partial_accuracy_op, 'accuracy': accuracy_op } for i, num_list in enumerate(self.license_number_list): loss, loss_op = tf.metrics.mean_tensor(self.num_losses[i]) accuracy, accuracy_op = tf.metrics.accuracy( labels=self.labels[:, i], predictions=self.predictions[i]) self.metrics.update({ 'num{}_loss'.format(i): loss, 'num{}_accuracy'.format(i): accuracy }) self.metric_ops.update({ 'num{}_loss'.format(i): loss_op, 'num{}_accuracy'.format(i): accuracy_op }) self.metric_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES) self.reset_metric_op = tf.variables_initializer(self.metric_vars) def build_summary(self): with tf.name_scope('metric'): for metric_name, metric_tensor in self.metrics.items(): tf.summary.scalar(metric_name, metric_tensor) with tf.name_scope('hyperparam'): tf.summary.scalar('learning_rate', self.learning_rate) self.summary = tf.summary.merge_all() def cache_metric_values(self, sess): metric_values = sess.run(self.metric_vars) self.metric_values = metric_values def restore_metric_values(self, sess): for var, value in zip(self.metric_vars, self.metric_values): sess.run(var.assign(value)) def encode_labels(self, labels): encoded_labels = [] for label in labels: mapped_label = [] for i, num in enumerate(label): assert len(label) == len(self.license_number_list) idx = self.license_number_list[i].index(num) mapped_label.append(idx) encoded_labels.append(mapped_label) encoded_labels = np.array(encoded_labels) return encoded_labels def decode_predictions(self, predictions): predictions = np.column_stack(predictions) decoded_predictions = [] for prediction in predictions: decoded_prediction = [] for i, num_idx in enumerate(prediction): decoded_prediction.append(self.license_number_list[i][num_idx]) decoded_prediction = ''.join(decoded_prediction) decoded_predictions.append(decoded_prediction) return decoded_predictions def train(self, sess, train_dataset, val_dataset, test_dataset=None, load_checkpoint=False, checkpoint=None): hparams = self.hparams if not os.path.exists(hparams.summary_dir): os.mkdir(hparams.summary_dir) train_writer = tf.summary.FileWriter(hparams.summary_dir + '/train', sess.graph) val_writer = tf.summary.FileWriter(hparams.summary_dir + '/val') if test_dataset is not None: test_writer = tf.summary.FileWriter(hparams.summary_dir + '/test') train_fetches = { 'train_op': self.train_op, 'global_step': self.global_step } train_fetches.update(self.metric_ops) val_fetches = self.metric_ops if test_dataset is not None: test_fetches = self.metric_ops sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) if load_checkpoint: self.load(sess, checkpoint) # Training for _ in tqdm(range(self.hparams.num_epochs), desc='epoch'): for _ in tqdm(range(train_dataset.num_batches), desc='batch', leave=False): images, labels = train_dataset.next_batch() labels = self.encode_labels(labels) feed_dict = { self.images: images, self.labels: labels, self.is_train: True } train_record = sess.run(train_fetches, feed_dict=feed_dict) tqdm.write( "Train step {}: total loss: {:>10.5f} partial accuracy: {:8.2f} accuracy: {:8.2f}" .format(train_record['global_step'], train_record['total_loss'], train_record['partial_accuracy'] * 100, train_record['accuracy'] * 100)) if train_record['global_step'] % hparams.summary_period == 0: summary = sess.run(self.summary) train_writer.add_summary(summary, train_record['global_step']) # Validation if (train_record['global_step'] + 1) % hparams.eval_period == 0: self.cache_metric_values(sess) sess.run(self.reset_metric_op) for _ in tqdm(range(val_dataset.num_batches), desc='val', leave=False): images, labels = val_dataset.next_batch() labels = self.encode_labels(labels) feed_dict = {self.images: images, self.labels: labels} val_record = sess.run(val_fetches, feed_dict=feed_dict) tqdm.write( "Validation step {}: total loss: {:>10.5f} partial accuracy: {:8.2f} accuracy: {:8.2f}" .format(train_record['global_step'], val_record['total_loss'], val_record['partial_accuracy'] * 100, val_record['accuracy'] * 100)) summary = sess.run(self.summary) val_writer.add_summary(summary, train_record['global_step']) val_writer.flush() val_dataset.reset() self.restore_metric_values(sess) sess.run(self.reset_metric_op) self.save(sess, global_step=train_record['global_step']) train_dataset.reset() train_writer.close() val_writer.close() # Testing if test_dataset is not None: sess.run(self.reset_metric_op) for _ in tqdm(range(test_dataset.num_batches), desc='testing', leave=False): images, labels = val_dataset.next_batch() labels = self.encode_labels(labels) feed_dict = {self.images: images, self.labels: labels} test_record = sess.run(test_fetches, feed_dict=feed_dict) tqdm.write( "Testing: total loss: {:>10.5f} partial accuracy: {:8.2f} accuracy: {:8.2f}" .format(test_record['total_loss'], test_record['partial_accuracy'] * 100, test_record['accuracy'] * 100)) summary = sess.run(self.summary) test_writer.add_summary(summary, train_record['global_step']) test_writer.flush() test_writer.close() def eval(self, sess, test_dataset, checkpoint=None): hparams = self.hparams result = {'image': [], 'ground truth': [], 'prediction': []} sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) self.load(sess, checkpoint) # Testing for _ in tqdm(range(test_dataset.num_batches), desc='batch', leave=False): images, labels = test_dataset.next_batch() encoded_labels = self.encode_labels(labels) predictions, _ = sess.run([self.predictions, self.metric_ops], feed_dict={ self.images: images, self.labels: encoded_labels }) predictions = self.decode_predictions(predictions) for image, file, label, prediction in zip( images, test_dataset.current_image_files, labels, predictions): result['image'].append(file) result['ground truth'].append(label) result['prediction'].append(prediction) plt.imshow(image) plt.title(prediction) plt.savefig('{}/{}'.format(hparams.test_result_dir, file)) plt.close() result = pd.DataFrame.from_dict(result) result.to_csv('result.txt') eval_result = sess.run(self.metrics) with open('eval.txt', 'w') as f: for name, value in eval_result.items(): print('{}: {}'.format(name, value)) print('{}: {}'.format(name, value), file=f, end='\n') def test(self, sess, test_dataset, checkpoint=None): hparams = self.hparams result = {'image': [], 'prediction': []} sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) self.load(sess, checkpoint) # Testing for _ in tqdm(range(test_dataset.num_batches), desc='batch', leave=False): images = test_dataset.next_batch() predictions = sess.run(self.predictions, feed_dict={self.images: images}) predictions = self.decode_predictions(predictions) for image, file, prediction in zip( images, test_dataset.current_image_files, predictions): result['image'].append(file) result['prediction'].append(prediction) plt.imshow(image) plt.title(prediction) plt.savefig('{}/{}'.format(hparams.test_result_dir, file)) plt.close() result = pd.DataFrame.from_dict(result) result.to_csv('result.txt') def save(self, sess, save_dir=None, global_step=None): if self.saver is None: self.saver = tf.train.Saver() save_dir = save_dir or self.hparams.save_dir global_step = global_step or self.global_step.eval(session=sess) self.saver.save(sess, save_dir + '/recognizer-model.ckpt', global_step=global_step) def load(self, sess, checkpoint=None): if self.saver is None: self.saver = tf.train.Saver() if checkpoint is None: checkpoint = tf.train.latest_checkpoint(self.hparams.save_dir) if checkpoint is None: return self.saver.restore(sess, checkpoint)
def main(): logging = get_root_logger(args.log_path, mode='a') logging.info('Command Line Arguments:') for key, i in vars(args).items(): logging.info(key + ' = ' + str(i)) logging.info('End Command Line Arguments') batch_size = args.batch_size num_epochs = args.num_epochs resume_from = args.resume_from steps_per_checkpoint = args.steps_per_checkpoint gpu_id = args.gpu_id configure_process(args, gpu_id) if gpu_id > -1: logging.info('Using CUDA on GPU ' + str(gpu_id)) args.cuda = True else: logging.info('Using CPU') args.cuda = False '''Load data''' logging.info('Data base dir ' + args.data_base_dir) logging.info('Loading vocab from ' + args.vocab_file) with open(args.vocab_file, "r", encoding='utf-8') as f: args.target_vocab_size = len(f.readlines()) + 4 logging.info('Load training data from ' + args.data_path) train_data = UIDataset(args.data_base_dir, args.data_path, args.label_path, args.vocab_file) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True, collate_fn=collate_fn) logging.info('Load validation data from ' + args.val_data_path) val_data = UIDataset(args.data_base_dir, args.val_data_path, args.label_path, args.vocab_file) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True, collate_fn=collate_fn) # Build model logging.info('Building model') if args.resume_from: logging.info('Loading checkpoint from %s' % resume_from) checkpoint = torch.load(resume_from) else: checkpoint = None logging.info('Creating model with fresh parameters') model = build_model(args, gpu_id, checkpoint) logging.info(model) n_params, enc, dec = cal_parameters(model) logging.info('encoder: %d' % enc) logging.info('decoder: %d' % dec) logging.info('number of parameters: %d' % n_params) # Build optimizer optimier = torch.optim.SGD(model.parameters(), lr=args.learning_rate) optim = Optimizer(optimier) if checkpoint: optim.load_state_dict(checkpoint['optim']) optim.training_step += 1 # Build model saver model_saver = ModelSaver(args.model_dir, model, optim) train(model, optim, model_saver, num_epochs, train_loader, val_loader, steps_per_checkpoint, args.valid_steps, args.lr_decay, args.start_decay_at, args.cuda)
cg_train_set = NYU_Depth_V2_v2('train', loadSize, fineSize) print('Loaded training set') cg_val_set = NYU_Depth_V2_v2('val', loadSize, fineSize) print('Loaded val set') dataset = {0: train_set, 1: val_set} if len(sys.argv) == 3: cg_dataset = {0: cg_train_set, 1: cg_val_set} p2p_dataset = {0: cg_train_set, 1: cg_val_set} else: cg_dataset = {0: train_set, 1: val_set} p2p_dataset = {0: train_set, 1: val_set} opt = Optimizer(lr=1e-4, beta1=0.5, lambda_L1=0.01, n_epochs=100, batch_size=4) p2p_opt = p2pOptimizer(input_nc=3, output_nc=3, num_downs=8, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=True, ndf=64, n_layers_D=3, lr=0.0002, beta1=0.5, lambda_L1=5, n_blocks=9, padding_type='reflect')