config.n_cells = config.n_layers # double the number of cells for bidirectional networks if config.birnn: config.n_cells *= 2 if args.resume_snapshot: model = torch.load(args.resume_snapshot, map_location=device) else: model = SNLIClassifier(config) if args.word_vectors: model.embed.weight.data.copy_(inputs.vocab.vectors) model.to(device) criterion = nn.CrossEntropyLoss() opt = O.Adam(model.parameters(), lr=args.lr) iterations = 0 start = time.time() best_dev_acc = -1 train_iter.repeat = False header = ' Time Epoch Iteration Progress (%Epoch) Loss Dev/Loss Accuracy Dev/Accuracy' dev_log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}' .split(',')) log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}' .split(',')) makedirs(args.save_path) print(header)
config.n_embed = len(inputs.vocab) config.d_out = len(answers.vocab) config.n_cells = config.n_layers if config.birnn: config.n_cells *= 2 if args.resume_snapshot: model = torch.load(args.resume_snapshot, map_location=lambda storage, locatoin: storage.cuda(args.gpu)) else: model = SNLIClassifier(config) if args.word_vectors: model.embed.weight.data = inputs.vocab.vectors model.cuda() criterion = nn.CrossEntropyLoss() opt = O.Adam(model.parameters(), lr=args.lr) iterations = 0 start = time.time() best_dev_acc = -1 train_iter.repeat = False header = ' Time Epoch Iteration Progress (%Epoch) Loss Dev/Loss Accuracy Dev/Accuracy' dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}'.split(',')) log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'.split(',')) os.makedirs(args.save_path, exist_ok=True) print(header) for epoch in range(args.epochs): train_iter.init_epoch() n_correct, n_total = 0, 0 for batch_idx, batch in enumerate(train_iter):
else: config.regularization = 0 model = SNLIClassifier(config) if config.spinn: model.out[len(model.out._modules) - 1].weight.data.uniform_(-0.005, 0.005) if args.word_vectors: model.embed.weight.data = inputs.vocab.vectors if args.gpu != -1: model.cuda() if args.resume_snapshot: model.load_state_dict(torch.load(args.resume_snapshot)) criterion = nn.CrossEntropyLoss() #opt = optim.Adam(model.parameters(), lr=args.lr) opt = optim.RMSprop(model.parameters(), lr=config.lr, alpha=0.9, eps=1e-6, weight_decay=config.regularization) iterations = 0 start = time.time() best_dev_acc = -1 train_iter.repeat = False header = ' Time Epoch Iteration Progress (%Epoch) Loss Dev/Loss Accuracy Dev/Accuracy' dev_log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}' .split(',')) log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'
config.d_mlp = 1024 config.d_hidden = 300 config.d_embed = 300 config.d_proj = 600 torch.backends.cudnn.enabled = False else: config.regularization = 0 model = SNLIClassifier(config) if config.spinn: model.out[len(model.out._modules) - 1].weight.data.uniform_(-0.005, 0.005) if args.word_vectors: model.embed.weight.data = inputs.vocab.vectors if args.gpu != -1: model.cuda() print(next(model.parameters()).is_cuda) if args.resume_snapshot: model.load_state_dict(torch.load(args.resume_snapshot)) criterion = nn.CrossEntropyLoss() #opt = optim.Adam(model.parameters(), lr=args.lr) opt = optim.RMSprop(model.parameters(), lr=config.lr, alpha=0.9, eps=1e-6, weight_decay=config.regularization) iterations = 0 start = time.time() best_dev_acc = -1 train_iter.repeat = False
config.mlp_dropout = dropout config.embed_dropout = dropout config.d_tracker = tracker_dim config.birnn = birnn config.d_mlp = mlp_dimension config.predict = predict_transitions config.n_mlp_layers = n_mlp_layers if birnn: config.n_cells *= 2 model = SNLIClassifier(config) model.embed.weight.data = inputs.vocab.vectors model.to(device) criterion = nn.CrossEntropyLoss() opt = optim.Adam(model.parameters(), lr=lr) iterations = 0 start = time.time() best_dev_acc = -1 train_iter.repeat = False header = ' Time Epoch Iteration Progress (%Epoch) Loss Dev/Loss Accuracy Dev/Accuracy' dev_log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}' .split(',')) log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}' .split(',')) print(header)