rnn_type=args.rnn_type, ksize=args.ksize, n_level=args.n_level, n=args.n, dropout=dropout, emb_dropout=emb_dropout, tied_weights=tied, butterfly=args.butterfly) model.to(device) model_name = "d_{}_h_{}_type_{}_ks_{}_level_{}_n_{}_lr_{}_drop_{}_butterfly_{}".format( args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout, args.butterfly) count_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) for name, param in model.named_parameters(): if param.requires_grad: print(name, param.data.size(), param.data.numel()) print("Number of parameters = {}".format(count_parameters)) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt' with open(message_filename, 'w') as out: out.write('start\ntrainable parameters={}\n'.format(count_parameters)) # May use adaptive softmax to speed up training criterion = nn.CrossEntropyLoss() lr = args.lr
cuda=args.cuda) if args.cuda: model.to(device) model_name = "data_{}_d_{}_h_{}_type_{}_k_{}_level_{}_n_{}_lr_{}_drop_{}".format( args.data, args.d_model, args.h, args.rnn_type, args.ksize, args.n_level, args.n, args.lr, args.dropout) message_filename = s_dir + 'r_' + model_name + '.txt' model_filename = s_dir + 'm_' + model_name + '.pt' with open(message_filename, 'w') as out: out.write('start\n') criterion = nn.CrossEntropyLoss() lr = args.lr optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) def save(save_model, save_filename): with open(save_filename, "wb") as f: torch.save(save_model, f) #print('Saved as %s' % save_model) def output_s(output_message, save_filename): print(output_message) with open(save_filename, 'a') as file: file.write(output_message + '\n') def evaluate(X_data, name='Eval'):
v_weights = get_valid_weights(valid_indices, valid_path) t_weights = get_train_weights(train_path) '''Transformer''' model = RT(input_size=190, d_model=args.dim, output_size=10, h=args.heads, rnn_type='RNN', ksize=args.ksize, n=args.rnn, n_level=args.levels, dropout=args.drop).to(args.device) criterion = nn.BCELoss(reduction='none') optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.b1, args.b2), weight_decay=args.l2) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.1, verbose=False) # loss tracker train_losses = np.zeros(args.epochs) val_losses = np.zeros(args.epochs) # accuracies tracker train_acc = np.zeros((args.epochs, 10)) val_acc = np.zeros((args.epochs, 10)) # frequency tracker