num_pixels = int(args.train.split('train')[1]) input_size = num_pixels**2 # 28 * 28 # X_train = get_data(train_dir, num_pixels, num_seq_train, max_seq_len) # X_test = get_data(test_dir, num_pixels, num_seq_test, max_seq_len) X_train = get_data_list(train_dir, num_pixels, max_seq_len) X_test = get_data_list(test_dir, num_pixels, max_seq_len) dropout = args.dropout emb_dropout = args.dropout model = RT(input_size, args.d_model, input_size, h=args.h, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=dropout, emb_dropout=emb_dropout, cuda=args.cuda) if args.cuda: model.to(device) model_name = "data_{}_d_{}_h_{}_type_{}_k_{}_level_{}_n_{}_lr_{}_drop_{}".format( args.data, args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt' with open(message_filename, 'w') as out:
corpus = data_generator(data_dir, args) eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, eval_batch_size, args) n_words = len(corpus.dictionary) dropout = args.dropout emb_dropout = args.emb_dropout tied = args.tied model = RT(args.d_model, n_words, h=args.h, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=dropout, emb_dropout=emb_dropout, tied_weights=tied) model.to(device) model_name = "d_{}_h_{}_type_{}_ks_{}_level_{}_n_{}_lr_{}_drop_{}".format(args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt' with open(message_filename, 'w') as out: out.write('start\n') # May use adaptive softmax to speed up training
#load model indices = list(range(33715)) path = train_path data = Dataset(indices, path) loader = DataLoader(data, batch_size=1, shuffle=False, collate_fn=collate_fn) folder = '/home/osvald/Projects/Diagnostics/github/models/Transformer/CV4/IS/dim64_heads4_levels4/lr0.000848_b1_0.800277_b2_0.959436_drop0.212179_l2_0.003331' model = RT(input_size=190, d_model=64, output_size=10, h=4, rnn_type='RNN', ksize=3, n=1, n_level=4, dropout=0).to(args.device) model.load_state_dict(torch.load(folder + '/best_auc_model')) model.train() # I have no idea why running without this causes an error with torch.no_grad(): for batch, labels, seq_len in loader: # pass to GPU if available batch, labels = batch.to(args.device), labels.to(args.device) out = model(batch) break heatmap = VanillaSaliency(model)
data_dir = os.path.join(base_path, 'data/') s_dir = os.path.join(base_path, 'output/') print(args) input_size = 88 X_train, X_valid, X_test = data_generator(args.data, data_dir) dropout = args.dropout emb_dropout = args.dropout model = RT(input_size, args.d_model, input_size, h=args.h, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=dropout, emb_dropout=emb_dropout, butterfly=args.butterfly) model.to(device) model_name = "data_{}_d_{}_h_{}_type_{}_k_{}_level_{}_n_{}_lr_{}_drop_{}_butterfly_{}".format( args.data, args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout, args.butterfly) count_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) print("Number of parameters = {}".format(count_parameters))
train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, eval_batch_size, args) n_words = len(corpus.dictionary) dropout = args.dropout emb_dropout = args.emb_dropout tied = args.tied model = RT(args.d_model, n_words, h=args.h, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=dropout, emb_dropout=emb_dropout, tied_weights=tied, butterfly=args.butterfly) model.to(device) model_name = "d_{}_h_{}_type_{}_ks_{}_level_{}_n_{}_lr_{}_drop_{}_butterfly_{}".format( args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout, args.butterfly) count_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) for name, param in model.named_parameters():
test_data = batchify(char_tensor(corpus, testfile), 1) torch.save(test_data, data_dir + 'test_data.pt') print("Corpus size: ", n_characters) else: train_data = torch.load(data_dir + 'train_data.pt') val_data = torch.load(data_dir + 'val_data.pt') test_data = torch.load(data_dir + 'test_data.pt') n_characters = 49 train_data.to(device) val_data.to(device) test_data.to(device) print (args) dropout = args.dropout emb_dropout = dropout model = RT(args.d_model, n_characters, h=args.h, n=args.n, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, dropout=dropout, emb_dropout=emb_dropout) model_name = "data_{}_d_{}_h_{}_type_{}_ksize_{}_level_{}_n_{}_lr_{}_dropout_{}".format( args.dataset, args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt' with open(message_filename, 'w') as out: out.write('start\n') model.to(device) criterion = nn.CrossEntropyLoss() lr = args.lr optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr)
model_name = arch_name + '/' + opt_name ''' training data setup ''' if args.cv == 1 or args.cv == 2 or args.cv == 3: valid_indices = list(range(4214)) elif args.cv == 4: valid_indices = list(range(4215)) else: valid_indices = list(range(4213)) v_weights = get_valid_weights(valid_indices, valid_path) t_weights = get_train_weights(train_path) '''Transformer''' model = RT(input_size=190, d_model=args.dim, output_size=10, h=args.heads, rnn_type='RNN', ksize=args.ksize, n=args.rnn, n_level=args.levels, dropout=args.drop).to(args.device) criterion = nn.BCELoss(reduction='none') optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.b1, args.b2), weight_decay=args.l2) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.1, verbose=False)
batch_size = args.batch_size n_classes = 10 input_channels = 1 seq_length = int(784 / input_channels) epochs = args.epochs steps = 0 print(args) train_loader, test_loader = data_generator(root, batch_size) model = RT(input_channels, args.d_model, n_classes, h=args.h, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=args.dropout, emb_dropout=args.dropout) model.to(device) model_name = "d_{}_h_{}_t_{}_ksize_{}_level_{}_n_{}_lr_{}_dropout_{}".format( args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt' with open(message_filename, 'w') as out: out.write('start\n')
test_Y[i] = test_Y[i].to(device) n_words = len(corpus.dictionary) n_categories = len(corpus.categories) n_words_test = sum(s.size()[0] for s in test_X) dropout = args.dropout emb_dropout = args.emb_dropout tied = args.tied model = RT(n_words, args.d_model, n_categories, h=args.h, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=dropout, emb_dropout=emb_dropout, cuda=args.cuda, emb_weights=emb_weights) if args.cuda: # noinspection PyUnresolvedReferences model.to(device) model_name = "d_{}_h_{}_type_{}_ks_{}_level_{}_n_{}_lr_{}_drop_{}".format( args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt'
batch_size = args.batch_size n_classes = 10 epochs = args.epochs steps = 0 dropout = args.dropout emb_dropout = args.emb_dropout tied = args.tied model = RT(args.d_model, n_words, n_classes, h=args.h, rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=args.dropout, emb_dropout=args.dropout, tied_weights=args.tied, cuda=args.cuda, max_len=max_seq_length) if args.cuda: model.to(device) model_name = "d_{}_h_{}_t_{}_ksize_{}_level_{}_n_{}_lr_{}_dropout_{}".format( args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt'