def main(opt): global _TFS_DIC root_path = opt.root data_name = opt.src data_loader = get_data_loader(data_name, root_path, tfs=_TFS_DIC[data_name]) data_loader_v2 = get_data_loader('svhn_transfer', root_path, tfs=_TFS_DIC[opt.tgt]) network = LeNet() if opt.epoch == 0: network.load_state_dict(torch.load('model_info/nn.pt')) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(network.parameters(), lr=opt.lr, weight_decay=opt.reg, betas=(opt.beta1, opt.beta2)) trainer = Trainer(network, data_loader, optimizer, criterion, 'model_info/nn.pt') trainer.train(opt.epoch) trainer.save_model() # trainer.test() # trainer.dl = data_loader_v2 for i in range(100): idx = random.randint(0, data_loader.dataset.__len__() - 1) trainer.grad_cam(data_loader_v2.dataset[idx], idx)
def __init__(self, model_source, model_target, classify_model, source, target, args, graph): self.test_loader_source = get_data_loader(args, False, source) self.test_loader_target = get_data_loader(args, False, target) self.model_source = model_source self.model_target = model_target self.classify_model = classify_model self.args = args self.graph = graph self.cuda = args.cuda
def main(opts): # Create train and test dataloaders for images from the two domains X and Y dataloader_X, test_dataloader_X = get_data_loader(opts=opts, image_type=opts.X) dataloader_Y, test_dataloader_Y = get_data_loader(opts=opts, image_type=opts.Y) # Create checkpoint and sample directories utils.create_dir(opts.checkpoint_dir) utils.create_dir(opts.sample_dir) # Start training training_loop(dataloader_X, dataloader_Y, test_dataloader_X, test_dataloader_Y, opts)
def main(opts): """Loads the data, creates checkpoint and sample directories, and starts the training loop. """ # Create dataloaders for images from the two domains X and Y dataloader_X = get_data_loader(opts.X, opts=opts) dataloader_Y = get_data_loader(opts.Y, opts=opts) print('dataloaders created') print('lengh of dataloader X: ', len(dataloader_X)) print('lengh of dataloader Y: ', len(dataloader_Y)) # Create checkpoint and sample directories utils.create_dir(opts.checkpoint_dir) utils.create_dir(opts.sample_dir) # Start training training_loop(dataloader_X, dataloader_Y, opts)
def show_samples(loader='train'): #TODO: ensure no transforms on these loader = get_data_loader(batch_size=16, set=loader, shuffle=False) plt.clf() plt.subplot('481') for data, _ in loader: inputs = data outputs, _, _ = model(inputs) for i in range(16): output = np.transpose(outputs[i].detach().cpu().numpy(), [1, 2, 0]) original = np.transpose(inputs[i].detach().cpu().numpy(), [1, 2, 0]) plt.subplot(4, 8, i + 1) plt.axis('off') plt.imshow(original) plt.subplot(4, 8, 16 + i + 1) plt.axis('off') plt.imshow(output) if save: plt.savefig(save_path + 'faces.png') plt.show() break
def __init__(self, config: Config): logging.basicConfig(format='%(levelname)s: [%(asctime)s] [%(name)s:%(lineno)d-%(funcName)20s()] %(message)s', level=logging.INFO, datefmt='%d/%m/%Y %I:%M:%S') self._config = config self._model = FoveaNet(num_classes=config.num_classes).to(self._config.device) self._optimizer = torch.optim.Adam(params=self._model.parameters(), lr=1e-4) self._logger = logging.getLogger(self.__class__.__name__) self._visualizer = SummaryWriter() self._data_loader = {DataMode.train: get_data_loader(config, mode=DataMode.train), DataMode.eval: get_data_loader(config, mode=DataMode.eval)} self._loss = TotalLoss(config) self._precision_meter = { DataMode.train: PrecisionMeter(postprocess_fn=self._config.post_processing_fn, config=config), DataMode.eval: PrecisionMeter(postprocess_fn=self._config.post_processing_fn, config=config) } self._load_checkpoint(os.path.join(self._config.path_to_checkpoints, self._config.checkpoint_name))
def train_model(config, model, mem_set, epochs, current_proto): data_loader = get_data_loader(config, mem_set, batch_size = 5) model.train() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), config['learning_rate']) for epoch_i in range(epochs): model.set_memorized_prototypes(current_proto) tr_loss = 0.0 step_all = 0 model.zero_grad() for step, (labels, neg_labels, sentences, lengths) in enumerate(tqdm(data_loader)): logits, rep = model(sentences, lengths) logits_proto = model.mem_forward(rep) labels = labels.to(config['device']) loss = (criterion(logits_proto, labels)) if config['n_gpu'] > 1: loss = loss.mean() if config['gradient_accumulation_steps'] > 1: loss = loss / config['gradient_accumulation_steps'] loss.backward() tr_loss += loss.item() if (step + 1) % config['gradient_accumulation_steps'] == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), config['max_grad_norm']) optimizer.step() step_all += 1 model.zero_grad() print (tr_loss / step_all) return model
def train_simple_model(config, model, train_set, epochs): data_loader = get_data_loader(config, train_set) model.train() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), config['learning_rate']) for epoch_i in range(epochs): tr_loss = 0.0 step_all = 0 for step, (labels, neg_labels, sentences, lengths) in enumerate(tqdm(data_loader)): model.zero_grad() logits, _ = model(sentences, lengths) labels = labels.to(config['device']) loss = criterion(logits, labels) if config['n_gpu'] > 1: loss = loss.mean() if config['gradient_accumulation_steps'] > 1: loss = loss / config['gradient_accumulation_steps'] loss.backward() tr_loss += loss.item() if (step + 1) % config['gradient_accumulation_steps'] == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), config['max_grad_norm']) optimizer.step() step_all += 1 print (tr_loss / step_all) return model
def select_data(proto_set, config, model, sample_set, seen_relations, total_relations, total_size): data_loader = get_data_loader(config, sample_set, False, False, 100) data_id = {} data_feature = {} data_sort_index = {} total = 0 data_loader = tqdm(data_loader) for step, (labels, neg_labels, sentences, lengths) in enumerate(data_loader): features = model.get_feature(sentences, lengths) for index, feature in enumerate(features): label_index = (labels[index]).item() if not label_index in data_id: data_id[label_index] = [] data_feature[label_index] = [] data_feature[label_index].append(feature) data_id[label_index].append(total) total += 1 for i in data_id: features = data_feature[i] indexes = np.array(data_id[i]) core = (np.concatenate(features)).mean(0, keepdims = True) distances = np.linalg.norm(features - core, ord = 2, axis = -1, keepdims = False) sort_indexes = indexes[np.argsort(distances)] data_sort_index[i] = sort_indexes seen_relation_list = [] for i in seen_relations: if i in data_feature: seen_relation_list.append(i) mem_set = [] sum = 0 while sum < total_size: mi = total_size relation_lists = [] for i in seen_relation_list: ins = len(data_sort_index[i]) if ins > 0: relation_lists.append(i) if ins < mi: mi = ins if len(relation_lists) == 0: break mi = max(min(mi, (total_size - sum) // len(relation_lists)), 1) for i in relation_lists: for j in range(mi): instance = sample_set[data_sort_index[i][j]] mem_set.append(instance) sum = sum + 1 if sum >= total_size: break if sum >= total_size: break data_sort_index[i] = data_sort_index[i][mi:] return mem_set
def pretrain(cutoff=0.6): D_acc = 0 while D_acc < cutoff: train_loader = get_data_loader(batch_size=batch_size, set='train') D_acc = 0 parity = 0 for i, (batch, _) in enumerate(train_loader): batch = batch.to(args.device) ones_label = Variable(torch.ones(batch.shape[0], 1)) zeros_label = Variable(torch.zeros(batch.shape[0], 1)) rec_enc, mu, logvar = generator(batch) noisev = Variable(torch.randn(batch.shape[0], args.latent)) rec_noise = generator.decode(noisev) ''' train discriminator ''' # real photo output = discriminator(batch) DR_loss = criterion(output, ones_label * 0.9) D_acc += ((torch.round(output) == ones_label).sum().cpu().numpy() / torch.numel(ones_label) )/2 if parity: # reconstructed photo output = discriminator(rec_enc) DF_loss = criterion(output, ones_label * 0.1) D_acc+= ( (torch.round(output) == zeros_label).sum().cpu().numpy() / torch.numel(zeros_label) )/2 elif not parity: # Decoded noise output = discriminator(rec_noise) DF_loss = criterion(output, ones_label * 0.1) D_acc += ( (torch.round(output) == zeros_label).sum().cpu().numpy() / torch.numel(zeros_label) )/2 D_l = DR_loss + DF_loss D_optimizer.zero_grad() D_l.backward() D_optimizer.step() # Clip weights of discriminator for p in discriminator.parameters(): p.data.clamp_(-args.clip_value, args.clip_value) # switch train type next D batch parity = not parity if D_acc/(i+1) > cutoff: print('pretrain D_acc: %.3f' % (D_acc/(i+1))) return D_acc = D_acc/len(train_loader) print('pretrain D_acc: %.3f' % (D_acc))
def main(opt): svhn_corpus = get_data_loader('svhn', opt.root, tfs=_TFS, batch_size=1, train_flag=True) for idx, (img, label) in enumerate(svhn_corpus): if not os.path.exists( 'data/svhn_png/training/%s/' % str(label.item())): os.mkdir('data/svhn_png/training/%s/' % str(label.item())) save_image( img, 'data/svhn_png/training/%s/%s.png' % (str(label.item()), idx))
def select_data_twice(mem_set, proto_set, config, model, sample_set, num_sel_data, at_least=3): data_loader = get_data_loader(config, sample_set, False, False) features = [] for step, (_, _, sentences, lengths) in enumerate(data_loader): feature = model.get_feature(sentences, lengths) features.append(feature) features = np.concatenate(features) num_clusters = min(num_sel_data, len(sample_set)) distances = KMeans(n_clusters=num_clusters, random_state=0).fit_transform(features) rel_info = {} rel_alloc = {} for index, instance in emumerate(sample_set): if not instance[0] in rel_info: rel_info[instance[0]] = [] rel_alloc[instance[0]] = 0 rel_info[instance[0]].append(index) for i in range(num_clusters): sel_index = np.argmin(distances[:, i]) instance = sample_set[sel_index] rel_alloc[instance[0]] += 1 rel_alloc = [(i, rel_alloc[i]) for i in rel_alloc] at_least = min(at_least, num_sel_data // len(rel_alloc)) while True: rel_alloc = sorted(rel_alloc, key=lambda num: num[1], reverse=True) if rel_alloc[-1][1] >= at_least: break index = 0 while rel_alloc[-1][1] < at_least: if rel_alloc[index][1] <= at_least: index = 0 rel_alloc[-1][1] += 1 rel_alloc[index][1] -= 1 index += 1 print(rel_alloc) for i in rel_alloc: label = i[0] num = i[1] tmp_feature = features[rel_info[label]] distances = KMeans(n_clusters=num_clusters, random_state=0).fit_transform(tmp_feature) mem_set.append(instance) proto_set[instance[0]].append(instance) return mem_set
def convert2img(tran_net): tran_net.eval() svhn_corpus = get_data_loader('svhn', opt.root, tfs=_TFS_DIC['svhn'], batch_size=1, train_flag=True) for idx, (img, label, _) in enumerate(svhn_corpus): if not os.path.exists('data/svhn_gray/training/%s/' % str(label.item())): os.mkdir('data/svhn_gray/training/%s/' % str(label.item())) img = Variable(img) if torch.cuda.is_available(): img = img.cuda() # out = tran_net(img)['rec'] save_image(img.data, 'data/svhn_gray/training/%s/%s.png' % (str(label.item()), idx)) tran_net.train()
def select_data_simple(mem_set, proto_set, config, model, sample_set, num_sel_data): data_loader = get_data_loader(config, sample_set, False, False, 100) features = [] labels = [] for step, (labels, neg_labels, sentences, lengths) in enumerate(data_loader): feature = model.get_feature(sentences, lengths) features.append(feature) features = np.concatenate(features) num_clusters = min(num_sel_data, len(sample_set)) distances = KMeans(n_clusters=num_clusters, random_state=0).fit_transform(features) for i in range(num_clusters): sel_index = np.argmin(distances[:,i]) instance = sample_set[sel_index] mem_set.append(instance) proto_set[instance[0]].append(instance) return mem_set
def main(config): # Create directories if not exist mkdir(config.log_path) mkdir(config.model_save_path) dataset = get_data_loader(data_path=config.data_path, batch_size=config.batch_size, mode=config.mode) solver = Solver(dataset, vars(config)) if config.mode == 'train': solver.train() elif config.mode == 'test': solver.test() return solver
def main(args): model = None if args.model == 'DCGAN': model = DCGAN_MODEL(args) elif args.model == 'WGAN-GP': model = model = WGAN_GP(args) # Load datasets to train and test loaders train_loader, test_loader = get_data_loader(args) #feature_extraction = FeatureExtractionTest(train_loader, test_loader, args.cuda, args.batch_size) # Start model training if args.is_train == 'True': model.train(train_loader, args) # start evaluating on test data else: model.evaluate(test_loader, args.load_D, args.load_G)
def get_memory(config, model, proto_set): memset = [] resset = [] rangeset= [0] for i in proto_set: memset += i rangeset.append(rangeset[-1] + len(i)) data_loader = get_data_loader(config, memset, False, False, 100) features = [] for step, (labels, neg_labels, sentences, lengths) in enumerate(data_loader): feature = model.get_feature(sentences, lengths) features.append(feature) features = np.concatenate(features) protos = [] print ("proto_instaces:%d"%(len(features)-len(proto_set))) for i in range(len(proto_set)): protos.append(torch.tensor(features[rangeset[i]:rangeset[i+1],:].mean(0, keepdims = True))) protos = torch.cat(protos, 0) return protos
def train_simple_model(config, model, train_set, epochs): data_loader = get_data_loader(config, train_set) model.train() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), config['learning_rate']) for epoch_i in range(epochs): losses = [] for step, (labels, neg_labels, sentences, lengths) in enumerate(tqdm(data_loader)): model.zero_grad() logits, _ = model(sentences, lengths) labels = labels.to(config['device']) loss = criterion(logits, labels) loss.backward() losses.append(loss.item()) torch.nn.utils.clip_grad_norm_(model.parameters(), config['max_grad_norm']) optimizer.step() print(np.array(losses).mean()) return model
def evaluate_model(config, model, test_set, num_class): model.eval() data_loader = get_data_loader(config, test_set, False, False) num_correct = 0 total = 0.0 for step, (labels, neg_labels, sentences, lengths) in enumerate(data_loader): logits, rep = model(sentences, lengths) distances = model.get_mem_feature(rep) short_logits = distances logits = logits.cpu().data.numpy() for index, logit in enumerate(logits): score = short_logits[index] + logits[index] total += 1.0 golden_score = score[labels[index]] max_neg_score = -2147483647.0 for i in neg_labels[index]: #range(num_class): if (i != labels[index]) and (score[i] > max_neg_score): max_neg_score = score[i] if golden_score > max_neg_score: num_correct += 1 return num_correct / total
def train_model(config, model, mem_set, epochs, current_proto): data_loader = get_data_loader(config, mem_set, batch_size=5) model.train() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), config['learning_rate']) for epoch_i in range(epochs): # current_proto = get_memory(config, model, proto_memory) model.set_memorized_prototypes(current_proto) losses = [] for step, (labels, neg_labels, sentences, lengths) in enumerate(tqdm(data_loader)): model.zero_grad() logits, rep = model(sentences, lengths) logits_proto = model.mem_forward(rep) labels = labels.to(config['device']) loss = (criterion(logits_proto, labels)) loss.backward() losses.append(loss.item()) torch.nn.utils.clip_grad_norm_(model.parameters(), config['max_grad_norm']) optimizer.step() return model
def main(): with open("data/vocab.pkl", 'rb') as f: vocab = pickle.load(f) img_path = "data/flickr7k_images" cap_path = "data/factual_train.txt" styled_path = "data/humor/funny_train.txt" data_loader = get_data_loader(img_path, cap_path, vocab, 3) styled_data_loader = get_styled_data_loader(styled_path, vocab, 3) encoder = EncoderCNN(30) decoder = FactoredLSTM(30, 40, 40, len(vocab)) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() # for i, (images, captions, lengths) in enumerate(data_loader): for i, (captions, lengths) in enumerate(styled_data_loader): # images = Variable(images, volatile=True) captions = Variable(captions.long()) if torch.cuda.is_available(): # images = images.cuda() captions = captions.cuda() # features = encoder(images) outputs = decoder(captions, features=None, mode="humorous") print(lengths - 1) print(outputs) print(captions[:, 1:]) loss = masked_cross_entropy(outputs, captions[:, 1:].contiguous(), lengths - 1) print(loss) break
def main(config, resume): train_logger = Logger() data_loader = get_data_loader(config) valid_data_loader = data_loader.split_validation() model = get_model_instance(model_arch=config['arch'], model_params=config['model']) model.summary() loss = get_loss_function(config['loss'], **config['loss_args']) metrics = get_metric_functions(config['metrics']) trainer = Trainer(model, loss, metrics, resume=resume, config=config, data_loader=data_loader, valid_data_loader=valid_data_loader, train_logger=train_logger) trainer.train()
def main(): # load vocablary with open('data/vocab.pkl', 'rb') as f: vocab = pickle.load(f) # build model encoder = EncoderRNN(voc_size=60736, emb_size=300, hidden_size=300) decoder = FactoredLSTM(300, 512, 512, len(vocab)) encoder.load_state_dict(torch.load('pretrained_models/encoder-4.pkl')) decoder.load_state_dict(torch.load('pretrained_models/decoder-4.pkl')) # prepare images # transform = transforms.Compose([ # Rescale((224, 224)), # transforms.ToTensor() # ]) # img_names, img_list = load_sample_images('sample_images/', transform) # image = to_var(img_list[30], volatile=True) data_loader = get_data_loader('', 'data/factual_train.txt', vocab, 1) # if torch.cuda.is_available(): # encoder = encoder.cuda() # decoder = decoder.cuda() for i, (messages, m_lengths, targets, t_lengths) in enumerate(data_loader): print(''.join([vocab.i2w[x] for x in messages[0]])) messages = to_var(messages.long()) targets = to_var(targets.long()) # forward, backward and optimize output, features = encoder(messages, list(m_lengths)) outputs = decoder.sample(features, mode="humorous") caption = [vocab.i2w[x] for x in outputs] print(''.join(caption)) print('-------')
adversarial_loss = torch.nn.MSELoss() # Initialize generator and discriminator generator = Generator(opt) discriminator = Discriminator(opt) if cuda: generator.cuda() discriminator.cuda() adversarial_loss.cuda() # Initialize weights generator.apply(weights_init_normal) discriminator.apply(weights_init_normal) dataloader = get_data_loader(opt) # optimizer optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr) Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor # resume checkpoint if opt.resume_generator and opt.resume_discriminator: print('Resuming checkpoint from {} and {}'.format( opt.resume_generator, opt.resume_discriminator)) checkpoint_generator = torch.load(opt.resume_generator) checkpoint_discriminator = torch.load(opt.resume_discriminator) generator.load_state_dict(checkpoint_generator['generator'])
transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # Data-loader of testing set transform_val = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) train_dataset = get_data_loader(DATA_INFO.DATA_LOADER, DATA_INFO.TRAIN_DIR, transform_train, DATA_INFO.NUM_CLASSES, "train", opt.MODEL.IMAGE_SIZE) val_dataset = get_data_loader(DATA_INFO.DATA_LOADER, DATA_INFO.VAL_DIR, transform_val, DATA_INFO.NUM_CLASSES, "val", opt.MODEL.IMAGE_SIZE) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=opt.TRAIN.SHUFFLE, num_workers=opt.TRAIN.WORKERS) test_loader = torch.utils.data.DataLoader(val_dataset, batch_size=opt.VALID.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS)
for inp, trg in dl: inp = inp.to(device) trg = trg.to(device) loss = model.forwar_and_loss(inp, trg) pbar.update(1) pbar.set_description('val_loss = %.6f' % loss.item()) if __name__ == '__main__': args = get_args() with open(args.token_file, 'rb') as f: tokenizer = pickle.load(f) train_dl = get_data_loader(tokenizer, args.train_file, args.batch_size) val_dl = get_data_loader(tokenizer, args.valid_file, args.batch_size) vocab_size = len(tokenizer.word_index) + 1 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = Model(vocab_size, vocab_size, args.emb_size, args.hidden_size) model = model.to(device) if args.load_weight is not None: print("Load weight from %s" % args.load_weight) w = torch.load(args.load_weight) model.load_state_dict(w) optim = torch.optim.Adam(model.parameters(), lr=args.lr) if not os.path.isdir(args.weight_dir):
generator_sample = generator_sample.mul(0.5).add(0.5) images.append(generator_sample) for i in range(imgs.shape[0]): generator_sample = [images[j].data[i] for j in range(number_int)] #generator_imgs = make_grid(generator_sample, nrow=number_int, normalize=True, scale_each=True) generator_imgs = make_grid(generator_sample, nrow=number_int) save_image(generator_imgs, '%s/interpolate_%d_%d.png' % (img_dir, i, j)) writer.add_image('interpolat/%d_%d' % (i, j), generator_imgs, global_step=step + 1) ########################################################################## # Configure data loader dataloader, test_loader = get_data_loader(opt) ########################################################################## # Loss weight for gradient penalty lambda_gp = 10 lambda_distil = opt.lambda_distil class VID(nn.Module): def __init__(self, criterion, thin_factor=2): super(VID, self).__init__() self.teacher = Discriminator(nh=DIM) self.student = Discriminator(nh=DIM // thin_factor) # (t, s, tc, sc)
# load dataset dataset = pd.read_csv('../dataset/train.csv') # path to dataset xData = dataset.text xData = xData.map(lambda tweet:preProcess.preProc(tweet, REMOVE_STOPWORDS)) yData = dataset.target from sklearn.model_selection import train_test_split X_train, X_valid, y_train, y_valid = train_test_split(xData.values, yData.values, test_size=0.2, shuffle = True, stratify=yData.values, random_state=42) train_data_loader = data_loader.get_data_loader(X_train, y_train, BERT_Tokenizer, MAX_LEN, BATCH_SIZE) valid_data_loader = data_loader.get_data_loader(X_valid, y_valid, BERT_Tokenizer, MAX_LEN, BATCH_SIZE) BERT_Model = transformers.BertModel.from_pretrained(BERT_Model_str) network = model.Network(BERT_Model, DROPOUT).to(device) to_be_trained = [] for i, (name, param) in enumerate(network.named_parameters()): if any([ i in name for i in TO_FREEZE]): param.requires_grad = False else: to_be_trained.append(param)
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.patches_path, args.gt_path, args.bs, num_workers=8) model = CNN() if torch.cuda.is_available(): model.cuda() model.train() if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.CrossEntropyLoss() losses = [] try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate loss_epoch = [] for step, (patches, gt) in enumerate(data_loader): if torch.cuda.is_available(): patches = patches.cuda() gt = gt.cuda() model.zero_grad() out = model(patches) loss = model_loss(out, gt) loss.backward() optimizer.step() loss_step = loss.cpu().detach().numpy() loss_epoch.append(loss_step) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + " - Loss: " + str(loss_step)) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
# log hyperparameters to Weights & Biases wandb.config.update(args) experiment_name, output_path = create_experiment_folder( args.model_output_dir, args.exp_name) print("Run experiment '{}'".format(experiment_name)) write_config_to_file(args, output_path) device, n_gpu = setup_device() set_seed_everywhere(args.seed, n_gpu) sql_data, table_data, val_sql_data, val_table_data = spider_utils.load_dataset( args.data_dir, use_small=args.toy) train_loader, dev_loader = get_data_loader(sql_data, val_sql_data, args.batch_size, True, False) grammar = semQL.Grammar() model = IRNet(args, device, grammar) model.to(device) # track the model wandb.watch(model, log='parameters') num_train_steps = len(train_loader) * args.num_epochs optimizer, scheduler = build_optimizer_encoder(model, num_train_steps, args.lr_transformer, args.lr_connection, args.lr_base, args.scheduler_gamma)