def apply(self, inp, hidden=None, cell=None): bsize = tf.shape(inp)[0] zero_state = tf.constant(0., shape=[1, self.dim]) zero_state = tf.tile(zero_state, [bsize, 1]) if hidden is None: hidden = zero_state if cell is None: cell = zero_state out = M.LSTM(inp, hidden, cell, self.dim, self.name, self.reuse) self.reuse = True return out
def main(lstm_units, epochs): print('Generating data...') data1, labels1, lengths1 = data.binary_equal_length( dataset_size, max_sequence_length) print('Building model...') LSTM = model.LSTM(lstm_units) print('{} unit LSTM built'.format(lstm_units)) print('Training...') model.train_model(LSTM, data1, labels1, lengths1, epochs=epochs) return LSTM
def main(): print("\nParameters:") for attr, value in args.__dict__.items(): print("\t{}={}".format(attr.upper(), value)) # load data strain_data, sd_train_data, sdev_data, stest_data, embeddings =\ data_utils.load_dataset(args, 'askubuntu-master', dtrain=True) dtrain_data, ddev_data, dtest_data, _ =\ data_utils.load_dataset(args, 'Android-master') # initalize necessary parameters args.embed_num = embeddings.shape[0] args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] # load model if args.snapshot is None: # initalize model task_model = None if args.model == 'lstm': if args.bidirectional and (args.hidden_layer > 1): args.hidden_layer = 1 print('\nMultilayer bidirectional LSTM not supported yet,\ layer set to 1.\n') task_model = model.LSTM(args, embeddings) elif args.model == 'cnn': task_model = model.CNN(args, embeddings) domain_model = model.DomainClassifier(args, embeddings) # train models res = train2.train_model(strain_data, sd_train_data, sdev_data, stest_data, dtrain_data, ddev_data, dtest_data, task_model, domain_model, args) else: print('\nLoading model from [%s]...' % args.snapshot) try: mod = torch.load(args.snapshot) except: print("Sorry, This snapshot doesn't exist.") exit() print(mod) # evaluate print('\nEvaluating on target dev') evaluate.q_evaluate(mod, ddev_data, args) print('Evaluating on target test') evaluate.q_evaluate(mod, dtest_data, args)
def train(self, dataX, dataY): data = dataset.OnlineDeciderDataSet(seq_len=self.config.SEQ_LEN, raw_dataset_x=dataX, raw_dataset_y=dataY) self.model_lstm = model.LSTM(input_size=self.config.INPUT_SIZE, seq_length=self.config.SEQ_LEN, num_layers=2, out_size=self.config.OUTPUT_SIZE, hidden_size=5, batch_size=self.config.TRAIN_BATCH_SIZE, device=self.config.DEVICE) self.experiment = Experiment(config=self.config, model=self.model_lstm, dataset=data) self.experiment.run()
def train(self, dataN): classDatas = np.squeeze(dataN, axis=1) #classDatas = np.divide(classDatas,100) data = dataset.OnlineLearningFinancialData(seq_len=self.config.SEQ_LEN, data=classDatas, categoricalN=5) self.model_lstm = model.LSTM(input_size=self.config.INPUT_SIZE, seq_length=self.config.SEQ_LEN, num_layers=2, out_size=self.config.OUTPUT_SIZE, hidden_size=5, batch_size=self.config.TRAIN_BATCH_SIZE, device=self.config.DEVICE) self.experiment = Experiment(config=self.config, model=self.model_lstm, dataset=data) self.experiment.run()
def main(): print("\nParameters:") for attr, value in args.__dict__.items(): print("\t{}={}".format(attr.upper(), value)) # load data train_data, embeddings = data_utils.load_dataset() # initalize necessary parameters args.embed_num = embeddings.shape[0] args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] # choose model args.model = 'lstm' # args.model = 'cnn' # load model if args.model == 'lstm': mod = model.LSTM(args, embeddings) else: mod = model.CNN(args, embeddings) # train model res = train.train_model(train_data, mod, args)
def main(): #Hyperparameter parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=500, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--lrlist', default=[0.001, 0.0001], help="list for adjust learning rate") parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--weight-decay', type=float, default=0.00000001, help='weight decay parameter') #Data sampling method parser.add_argument( '--new_version', default=True, type=bool, help= "True : longer input length, False : input length + prediction length = 31" ) parser.add_argument( '--random-length', default=[False, True], type=bool, help="if you want to use randomized sample length, True.") parser.add_argument('--before_lim', default=120, type=int, help='initialize batchs input length') parser.add_argument('--after_lim', default=30, type=int, help='initialize batchs prediction length') # Mode Configure parser.add_argument('--test', default=False, help='use for testing(trained model)') parser.add_argument( '--save-model', action='store_true', default=True, # False~>True help='For Saving the current Model') parser.add_argument('--name', type=str, default='LSTM_20_2') parser.add_argument('--log-pass', type=int, default=10000) parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=2, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=500, metavar='N', # 10~>50 help='how many batches to wait before logging training status') parser.add_argument('--isweather', default=False, help='key of using weather information') parser.add_argument('--isMSEweighted', default=False, help='key for using weighted MSE') #Path # parser.add_argument('--resume', default='./model/LSTM_20_1/76001checkpoint_lstm_std.pth.tar', type=str, metavar='PATH', # help='path to latest checkpoint (default: none)') parser.add_argument('--resume', default='', type=str) parser.add_argument("--data-root", default="./data/03_merge/v06_divide_train_test/") parser.add_argument("--normalize-factor-path", type=str, default='./data/etc/normalize_factor.csv') parser.add_argument("--usable-idx-path", type=str, default="./data/etc/") # Cuda Configureration args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") cudnn.benchmark = True cudnn.deterministic = True kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # Normalize factor load fr = open(args.normalize_factor_path, 'r', encoding='cp949', newline='') normalize_factor = list(csv.reader(fr)) if args.test: train_loss_dataset = dataset.CustomDataset(args, args.data_root, "v05_trainset.csv", "idx_train_loss.csv", for_test=True) train_loss_sampler = sampler.BatchSampler( sampler.SequentialSampler(train_loss_dataset), batch_size=2000, drop_last=False, random_length=args.random_length, for_test=True, before_lim=args.before_lim, after_lim=args.after_lim, new_version_sampler=args.new_version) train_loss_loader = torch.utils.data.DataLoader( train_loss_dataset, batch_sampler=train_loss_sampler, **kwargs) test_dataset = dataset.CustomDataset(args, args.data_root, "v05_testset.csv", "idx_test.csv", for_test=True) test_sampler = sampler.BatchSampler( sampler.SequentialSampler(test_dataset), batch_size=2000, drop_last=False, random_length=args.random_length, for_test=True, before_lim=args.before_lim, after_lim=args.after_lim, new_version_sampler=args.new_version) test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, **kwargs) else: train_dataset = dataset.CustomDataset(args, args.data_root, "v05_trainset.csv", "idx_train.csv") # BatchSampler Parameter : (sampler, batch_size, drop_last,random_length = False,for_test=False,new_type_sampler=True,before_lim=-1,after_lim=-1) train_sampler = sampler.BatchSampler( sampler.RandomSampler(train_dataset), batch_size=args.batch_size, drop_last=False, random_length=args.random_length, before_lim=args.before_lim, after_lim=args.after_lim, new_version_sampler=args.new_version) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, **kwargs) train_loss_dataset = dataset.CustomDataset(args, args.data_root, "v05_trainset.csv", "idx_train_loss.csv", for_test=True) train_loss_sampler = sampler.BatchSampler( sampler.RandomSampler(train_loss_dataset), batch_size=args.test_batch_size, drop_last=False, random_length=args.random_length, for_test=True, before_lim=args.before_lim, after_lim=args.after_lim, new_version_sampler=args.new_version) train_loss_loader = torch.utils.data.DataLoader( train_loss_dataset, batch_sampler=train_loss_sampler, **kwargs) test_dataset = dataset.CustomDataset(args, args.data_root, "v05_testset.csv", "idx_test.csv", for_test=True) test_sampler = sampler.BatchSampler( sampler.RandomSampler(test_dataset), batch_size=args.test_batch_size, drop_last=False, random_length=args.random_length, for_test=True, before_lim=args.before_lim, after_lim=args.after_lim, new_version_sampler=args.new_version) test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, **kwargs) #Training Configuration writer = SummaryWriter('./log/' + args.name + '/') print(device) model = Model.LSTM(args).to(device) if args.isMSEweighted: criterion = weighted_mse_loss else: criterion = nn.MSELoss().to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) training = Train.Training(model) steps = 0 best_loss = 1000000 start_epoch = 1 #Load Trained Model if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) steps = checkpoint['steps'] best_loss = checkpoint['best_loss'] start_epoch = checkpoint['epoch'] else: print("=> no checkpoint found at '{}'".format(args.resume)) if not args.test: #학습용 for param_group in optimizer.param_groups: param_group['lr'] = args.lr for epoch in range(start_epoch, args.epochs): print(epoch) adjust_learning_rate(optimizer, steps, args) steps, best_loss = training.train(args, model, criterion, device, train_loader, test_loader, train_loss_loader, optimizer, epoch, writer, normalize_factor, steps, best_loss) else: #검증용 print("activate test code!!!!") args.random_length[0] = False test_mse, test_mae, test_won_sum_mae, test_won_mae, test_mae_list, test_var = training.eval( args, test_loader, model, criterion, device, normalize_factor, teacher_forcing=False) writer.add_scalars('Loss_test', {'test loss': test_mae}, 1) writer.add_scalars('Won mae_test', {'test_won_mae': test_won_sum_mae}, 1) writer.add_scalars( 'energy_prediction_mae_test', { 'elec': test_won_mae[0], 'water': test_won_mae[1], 'gas': test_won_mae[2] }, 1) print("mae_won={}".format(test_won_sum_mae)) for i in range(30): writer.add_scalars('예측 길이에 따른 오차_test', {'test won': test_mae_list[i]}, i + 1)
def train(epoch_num): path = "result.csv" df = pd.read_csv(path,dtype = 'object') mt = preprocessing.Mecab_neologd() m = model.LSTM(100,128,2) ohayou_words = ["おはよう","起床","起きた"] oyasumi_words = ["おやすみ","寝よう","寝る"] greetings = ohayou_words+oyasumi_words criterion = nn.CrossEntropyLoss() #2値分類だがrewardが離散なので平方2条誤差を用いる criterion2 = nn.MSELoss() word2vec = preprocessing.Word2vec() count = 0 correct = 0 x,y = df.shape LOG_FREQ = x for epoch in range(epoch_num): # for i in range(10): for i in range(df.shape[0]): begin_time = time.time() text = df.loc[i]["text"] ignore = False for greeting in greetings: if greeting in text: ignore = True else: text_normalized = mt.normalize_neologd(text) wakati_texts = mt.m.parse(text_normalized).split(" ") if not ignore: inputs = np.array([]) for word in wakati_texts: try: vec = word2vec.transform(word) vec = np.reshape(vec,(1,1,-1)) if len(inputs)==0: inputs = vec else: inputs = np.concatenate([inputs,vec]) except: # print("Unexpected error:", sys.exc_info()[0]) pass # raise inputs_ = torch.from_numpy(inputs) if inputs_.dim()<=2: pass # print("測定不可") else: output = m(inputs_) # print(output) # print(output.shape) predict = torch.argmax(output) if(int(df.loc[i]["reply_favorited_count"])+int(df.loc[i]["reply_retweet_count"])>=1): reward = torch.ones(1,dtype=torch.long) reward2 = torch.from_numpy(np.array([0,1])) label = 1 else: reward = torch.zeros(1,dtype=torch.long) reward2 = torch.from_numpy(np.array([1,0])) label = 0 count += 1 if predict == label: correct += 1 # print("output,label",output,reward) # loss= criterion(output,reward) loss = criterion2(output,reward2.float()) loss.backward() m.optimizer.step() # print("-"*15) # print("epoch:{0},text:{1},time:{2:1f}".format(epoch+1,i+1,time.time()-begin_time)) # print("text:{0}".format(text)) # print("predict:{0},label:{1}".format(predict,label)) # print("loss:{0}".format(loss)) print("正答率:{0}".format(correct/count)) correct = 0 count = 0
args.cuda = (not args.no_cuda) and torch.cuda.is_available(); del args.no_cuda args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] args.save_dir = os.path.join(args.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) print("\nParameters:") for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # model # cnn = model.CNN_Text(args) cnn = model.LSTM(args) if args.snapshot is not None: print('\nLoading model from {}...'.format(args.snapshot)) cnn.load_state_dict(torch.load(args.snapshot)) if args.cuda: torch.cuda.set_device(args.device) cnn = cnn.cuda() # wandb.init(config=args) # wandb.watch(cnn) # train or predict if args.predict is not None: label = train.predict(args.predict, cnn, text_field, label_field, args.cuda)
import parameters import model import plot # Set random seeds for reproducibility np.random.seed(0) torch.manual_seed(0) # Create world: 4x4 grid with actions [North, East, South, West] with random policy, with 15 sensory experiences grid = world.World('./graphs/5x5.json', 45) # Initalise hyperparameters for model params = parameters.parameters(grid) # Create lstm, to see if that learns well lstm = model.LSTM(params['n_x'] + params['n_actions'], 100, params['n_x'], n_a = params['n_actions']) # Create set of training worlds, as many as there are batches environments = [world.World('./graphs/5x5.json', 45) for batch in range(params['n_batches'])] # Create walks on each world walks = [env.generate_walks(params['walk_length'], params['n_walks']) for env in environments] # Create batched walks: instead of having walks separated by environment, collect them by environment batches = [[[[],[],[]] for l in range(params['walk_length'])] for w in range(params['n_walks'])] for env in walks: for i_walk, walk in enumerate(env): for i_step, step in enumerate(walk): for i_comp, component in enumerate(step): # Append state, observation, action across environments batches[i_walk][i_step][i_comp].append(component)
if args.cuda: data = data.cuda() return data eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.cell: model = model.LSTM(ntokens, args.emsize, nhidden, args.dropout, args.tied) else: model = model.RNNModel(args.model, ntokens, args.emsize, nhidden[0], args.nlayers, args.dropout, args.tied) if args.cuda: model.cuda() criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def clip_gradient(model, clip): """Computes a gradient clipping coefficient based on gradient norm.""" totalnorm = 0 for p in model.parameters():
correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) def run(): train_loader, test_loader = load_data(train_data, train_label, test_data, test_label) for epoch in range(1, epochs): train(epoch, train_loader) evaluate(test_loader) if __name__ == '__main__': batchsize = 128 epochs = 10000 train_data = np.array(pd.read_csv('data/train.csv', header=None)) train_label = np.array(pd.read_csv('data/trainlabel.csv', header=None)) test_data = np.array(pd.read_csv('data/test.csv', header=None)) test_label = np.array(pd.read_csv('data/testlabel.csv', header=None)) device = torch.device("cuda") net = model.LSTM().to(device) optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) criterion = nn.CrossEntropyLoss() run()
def load(): if(city=="montreal"): trained_model=model.LSTM(12,24,2) states=torch.load('montreal.pt') trained_model.load_state_dict(states) input_size =12 pm25_index = -1 #print(trained_model) #return trained_model if (city =="toronto"): trained_model=model.LSTM(13,24,2) states=torch.load('toronto.pt') trained_model.load_state_dict(states) input_size =13 pm25_index = -1 #return trained_model if (city =="ottawa"): trained_model=model.LSTM(13,24,2) states=torch.load('ottawa.pt') trained_model.load_state_dict(states) input_size =13 pm25_index = -1 return trained_model,input_size,pm25_index if (city =="vancouver"): trained_model=model.LSTM(16,24,2) states=torch.load('vancouver.pt') trained_model.load_state_dict(states) input_size =16 pm25_index = -3 return trained_model,input_size,pm25_index #return trained_model if (city =="hamilton"): trained_model=model.LSTM(13,24,2) states=torch.load('hamilton.pt') trained_model.load_state_dict(states) input_size =13 return trained_model,input_size,pm25_index if (city =="beijing"): trained_model=model.LSTM(1,24,2) states=torch.load('beijing.pt') trained_model.load_state_dict(states) input_size =1 pm25_index = -1 return trained_model,input_size,pm25_index #return trained_model if (city =="chengdu"): trained_model=model.LSTM(1,24,2) states=torch.load('chengdu.pt') trained_model.load_state_dict(states) input_size =1 pm25_index = -1 return trained_model,input_size,pm25_index if (city =="shanghai"): trained_model=model.LSTM(1,24,2) states=torch.load('shanghai.pt') trained_model.load_state_dict(states) input_size =1 pm25_index = -1; return trained_model,input_size,pm25_index if (city =="shenyang"): trained_model=model.LSTM(1,24,2) states=torch.load('shenyang.pt') trained_model.load_state_dict(states) input_size =1 pm25_index = -1 return trained_model,input_size,pm25_index
from flask import session import torch import numpy as np import dill as pickle import model from segmentation import Segmentation import re app = Flask(__name__) app.config['SECRET_KEY'] = 'the quick brown fox jumps over the lazy dog' app.config['CORS_HEADERS'] = 'Content-Type' cors = CORS(app) session = [] TEXT = pickle.load(open(f'TEXT.pkl', 'rb')) model = model.LSTM(2764, 300, 128, 1, 2, True, 0.5, TEXT.vocab.stoi[TEXT.pad_token]) TOKENIZER = lambda x: x.split() model.load_state_dict( torch.load('lstm.pt', map_location=lambda storage, loc: storage)) @app.route('/', methods=['GET']) def index(): return jsonify('API Service Base') @app.route('/api/prediction', methods=['GET']) def get_prediction(): result = [] try: model_result = session[0]
args.embed_num = len(text_field.vocab) args.class_num = len(label_field.vocab) - 1 args.cuda = (not args.no_cuda) and torch.cuda.is_available() del args.no_cuda args.save_dir = os.path.join( args.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) print("\nParameters:") for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # model m_model = None if args.snapshot is None: if args.which_model == 'lstm': m_model = model.LSTM(args, m_embedding) elif args.which_model == 'gru': m_model = model.GRU(args, m_embedding) elif args.which_model == 'rnn': m_model = model.RNN(args, m_embedding) else: print('\nLoading model from [%s]...' % args.snapshot) try: m_model = torch.load(args.snapshot) except: print("Sorry, This snapshot doesn't exist.") exit() if args.cuda: m_model = m_model.cuda() # train or predict
sort = False, #don't sort test/validation data batch_size=BATCH_SIZE, device=device) return train_iterator, valid_iterator, test_iterator, text_field.vocab # load data print("\nLoading data...") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') text_field = data.Field(tokenize='spacy', lower=True, include_lengths=True, batch_first=True) label_field = data.Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float) docid_field = data.RawField() train_iter, dev_iter, test_iter, text_voca = fall_data(docid_field, text_field, label_field, device=-1, repeat=False) # train or eval if args.test: best_model = model.LSTM().to(device) optimizer = optim.Adam(best_model.parameters(), lr=args.lr) t.load_checkpoint(destination_folder + '/model.pt', best_model, optimizer) t.evaluate(best_model, test_iter) else: print('start training') wandb.init() wandb.watch(model) model = model.LSTM(text_voca).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) eval_every = len(train_iter) // 2 t.train(model=model, optimizer=optimizer, train_loader=train_iter, valid_loader=dev_iter, num_epochs=args.epochs, eval_every = eval_every, file_path= destination_folder, device=device)
list of plotly fig data """ num_epochs = 100 lookback = 20 data = modelPack.get_stock_history('ETH-USD') data price = data[['Close']] scaler = MinMaxScaler(feature_range=(-1, 1)) vals = scaler.fit_transform(price['Close'].values.reshape(-1, 1)) price2 = pd.DataFrame() price2['Close'] = vals.reshape(-1) x_train, y_train, x_test, y_test = modelPack.split_data(price, 20) model = modelPack.LSTM() criterion = torch.nn.MSELoss(reduction='mean') optimiser = torch.optim.Adam(model.parameters(), lr=0.01) hist = np.zeros(num_epochs) start_time = time.time() lstm = [] for t in range(num_epochs): y_train_pred = model(x_train) loss = criterion(y_train_pred, y_train) #print("Epoch ", t, "MSE: ", loss.item()) hist[t] = loss.item() optimiser.zero_grad() loss.backward() optimiser.step()
mod = M.Model(inp) mod.fcLayer(1) re_mod = True return mod.get_current_layer() a = tf.placeholder(tf.float32, [None, 6, 2]) init_cell_state = tf.constant(0., shape=[1, 10]) a_split = tf.unstack(a, axis=1) out_split = [] for i in range(len(a_split)): out, cell = M.LSTM(a_split[i], init_cell_state if i==0 else out,\ init_cell_state if i==0 else cell, 10, 'LSTM1', i!=0) out_decoded = mod(out) out_split.append(out_decoded) out = tf.stack(out_split, 1) # should be in shape [None, 3, 1] label_holder = tf.placeholder(tf.float32, [None, 6, 1]) mask_holder = tf.placeholder(tf.float32, [None, 6, 1]) # loss = tf.reduce_mean(tf.square(label_holder - out)) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=out, labels=label_holder)) train_op = tf.train.AdamOptimizer(0.01).minimize(loss)
start_epoch = int(tokens[-2]) + 1 start_step = 1 #num = re.split('_|\.', filename)[-2] #start_epoch = int(num)+1 print "Modello recuperato dal file "+filename else: print "Nessun file trovato per il modello "+args.model+". Ne verrà creato uno nuovo." args.restart = False # instanzia nuova rete neurale if not args.restart: if args.model == 'RNN': rnn = model.RNN(data.n_letters, args.n_hidden, data.n_categories, cuda=args.cuda) elif args.model == 'LSTM': rnn = model.LSTM(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda) elif args.model == 'GRU': rnn = model.GRU(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda) assert rnn #optimizer = torch.optim.SGD(rnn.parameters(), lr=args.lr) optimizer = torch.optim.Adam(rnn.parameters(), lr=args.lr) criterion = nn.NLLLoss() if args.cuda: rnn.cuda() criterion.cuda() start = time.time() num_batches = data.n_instances / args.batch_size