def model_fn(model_dir):
    logger.info('Loading the model.')
    model_info = {}
    with open(os.path.join(model_dir, 'model_info.pth'), 'rb') as f:
        model_info = torch.load(f)
    print('model_info: {}'.format(model_info))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info('Current device: {}'.format(device))
    model = RNNModel(rnn_type=model_info['rnn_type'], ntoken=model_info['ntoken'],
                     ninp=model_info['ninp'], nhid=model_info['nhid'], nlayers=model_info['nlayers'],
                     dropout=model_info['dropout'], tie_weights=model_info['tie_weights'])
    with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f))
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        model.rnn.flatten_parameters()
    model.to(device).eval()
    logger.info('Loading the data.')
    corpus = data.Corpus(model_dir)
    logger.info('Done loading model and corpus. Corpus dictionary size: {}'.format(len(corpus.dictionary)))
    return {'model': model, 'corpus': corpus}
Exemple #2
0
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers, num_classes, bidirectional=True,
                 dropout_rate=0.3):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embed = nn.Embedding(vocab_size, embed_size)
        # self.rnn = nn.RNN(embed_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
        self.rnn = RNNModel(embed_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
        self.bidirectional = bidirectional
        if not bidirectional:
            self.fc = nn.Linear(hidden_size, num_classes)
        else:
            self.fc = nn.Linear(hidden_size * 2, num_classes)
        self.dropout = nn.Dropout(dropout_rate)

        self.init_weights()
Exemple #3
0
def main(_):
    vocab, dictionary = load_vocabulary(os.path.join(FLAGS.vocab_dir, 'sentence_vocab'))
    tags_list, tags_dict = load_vocabulary(os.path.join(FLAGS.vocab_dir, 'tag_vocab'))
    intent_list, intent_dict = load_vocabulary(os.path.join(FLAGS.vocab_dir, 'intent_vocab'))
    all_sentence = prepare_test_data(FLAGS.test_data_file, dictionary)

    model = RNNModel(hidden_size=FLAGS.hidden_size,
                     embed_size=FLAGS.embedding_size,
                     source_vocab_size=len(vocab),
                     tag_vocab_size=len(tags_list),
                     intent_vocab_size=len(intent_list))

    all_tags = []
    all_intent = []
    with tf.Session(graph=model.graph) as sess:
        # saver = tf.train.import_meta_graph('{}.meta'.format(FLAGS.checkpoint_file))
        model.saver.restore(sess, FLAGS.checkpoint_file)

        # graph = tf.get_default_graph()
        # input_x = graph.get_tensor_by_name('input_x:0')
        # input_len = graph.get_tensor_by_name('input_len:0')
        # keep_prob = graph.get_tensor_by_name('keep_prob:0')
        # output_tag = graph.get_tensor_by_name('output_tag:0')
        # output_intent = graph.get_tensor_by_name('output_intent:0')

        for sentence in all_sentence:
            predict_tags, predict_intent = sess.run([model.output_tag, model.output_intent], feed_dict={
                model.input_x: [sentence],
                model.input_len: [len(sentence)],
                model.keep_prob: 1.0
            })
            all_tags.append(predict_tags[0])
            all_intent.append(predict_intent[0])

    all_tags = [['O'] + [tags_list[i] for i in tags] for tags in all_tags]
    all_intent = [intent_list[i] for i in all_intent]
    with open(FLAGS.output_tag_file, 'w') as f:
        f.write('\n'.join([' '.join(tags) for tags in all_tags]))
    with open(FLAGS.output_intent_file, 'w') as f:
        f.write('\n'.join(all_intent))
Exemple #4
0
def predict(args):
    if not os.path.exists(args['model_dir']):
        raise IOError("Model directory doesn't exist: %s" %(args['model_dir']))

    with open(os.path.join(args['model_dir'], 'config.pkl'), 'rb') as f:
        config = cPickle.load(f)
    with open(os.path.join(args['model_dir'], 'vocab.pkl'), 'rb') as f:
        chars, vocab = cPickle.load(f)
        
    config.batch_size = 1
    config.seq_length = 1
    prediction_model = RNNModel(config=config)
    
    with tf.Session() as session:
        tf.initialize_all_variables().run()
        
        if not os.path.exists(args['model']):
            raise IOError("Model file doesn't exist: %s" %(args['model']))
            
        saver = tf.train.Saver(tf.all_variables())
        saver.restore(session, args['model'])

        state = session.run(prediction_model.cells.zero_state(1, tf.float32))    
        
        output = args['prime']
        for i in range(args['num_chars']):
            char = output[i]
            x = np.full((config.batch_size, config.seq_length), vocab[char], dtype=np.int32)
            feed = {prediction_model.input_data: x, prediction_model.initial_state: state}
                
            [predictionSoftmax, state] =  session.run([prediction_model._predictionSoftmax, prediction_model.final_state], 
                                                      feed)
            probs = predictionSoftmax[0]
            
            next_char = chars[pick(probs)]
            output += next_char
            char = next_char
                
        print('Prediction: %s \n' % (output))
        sys.stdout.flush()
Exemple #5
0
def model_fn(model_dir):
    logger.info('Loading the model.')
    model_info = {}
    with open(os.path.join(model_dir, 'model_info.pth'), 'rb') as f:
        model_info = torch.load(f)
    print('model_info: {}'.format(model_info))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info('Current device: {}'.format(device))
    model = RNNModel(rnn_type=model_info['rnn_type'], ntoken=model_info['ntoken'],
                     ninp=model_info['ninp'], nhid=model_info['nhid'], nlayers=model_info['nlayers'],
                     dropout=model_info['dropout'], tie_weights=model_info['tie_weights'])
    with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f))
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        model.rnn.flatten_parameters()
    model.to(device).eval()
    logger.info('Loading the data.')
    corpus = data.Corpus(model_dir)
    logger.info('Done loading model and corpus. Corpus dictionary size: {}'.format(len(corpus.dictionary)))
    return {'model': model, 'corpus': corpus}
Exemple #6
0
    for X in inputs:
        X=tf.reshape(X,[-1,W_xh.shape[0]])
        Z = tf.sigmoid(tf.matmul(X, W_xz) + tf.matmul(H, W_hz) + b_z)
        R = tf.sigmoid(tf.matmul(X, W_xr) + tf.matmul(H, W_hr) + b_r)
        H_tilda = tf.tanh(tf.matmul(X, W_xh) + tf.matmul(R * H, W_hh) + b_h)
        H = Z * H + (1 - Z) * H_tilda
        Y = tf.matmul(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H,)

# 训练模型并创作歌词

num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开']

train_and_predict_rnn(gru, get_params, init_gru_state, num_hiddens,
                          vocab_size, corpus_indices, idx_to_char,
                          char_to_idx, False, num_epochs, num_steps, lr,
                          clipping_theta, batch_size, pred_period, pred_len,
                          prefixes)

gru_layer = keras.layers.GRU(num_hiddens,time_major=True,return_sequences=True,return_state=True)
model = RNNModel(gru_layer, vocab_size)
train_and_predict_rnn_keras(model, num_hiddens, vocab_size,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes)



Exemple #7
0
sou_sense_word = torch.sparse.FloatTensor(torch.LongTensor([sssp1, sssp2]),
                                          torch.FloatTensor([1.0] * sssp_tot),
                                          torch.Size([nsenses, ntokens]))
sou_sense_word_t = torch.sparse.FloatTensor(torch.LongTensor([sssp2, sssp1]),
                                            torch.FloatTensor([1.0] * sssp_tot),
                                            torch.Size([ntokens, nsenses]))
if args.cuda:
    sou_sememe_sense = sou_sememe_sense.cuda()
    sou_sememe_sense_t = sou_sememe_sense_t.cuda()
    sou_sense_word = sou_sense_word.cuda()
    sou_sense_word_t = sou_sense_word_t.cuda()
sou_sparsemm1 = hownet_utils.spmm(sou_sememe_sense, sou_sememe_sense_t)
sou_sparsemm2 = hownet_utils.spmm(sou_sense_word, sou_sense_word_t)


model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied,
                 nsememes=nsememes, use_cuda=args.cuda, nsenses=nsenses, word_idx_s=word_idx_s)

# if args.cuda:
#     model.cuda()
#
# criterion = nn.CrossEntropyLoss()
# logsoftmax = nn.LogSoftmax()
#
# ###############################################################################
# # Training code
# ###############################################################################
#
#
# def repackage_hidden(h):
#     """Wraps hidden states in new Variables, to detach them from their history."""
#     if type(h) == Variable:
Exemple #8
0
    laughFiles = [
        "laughtracks/laughtrack{}.wav".format(i) for i in range(1, 8)
    ]
    rand = np.random.randint(0, len(laughFiles))
    return_code = subprocess.call(["afplay", laughFiles[rand]])


# set up google cloud credential
with open('service_account_key.json', 'r') as f:
    credential = f.read()

# initialize model
print("Setting up")
graph = tf.Graph()
with graph.as_default():
    model = RNNModel()
    init = tf.global_variables_initializer()
vocab = pickle.load(open(savedvocabularyfile, 'rb'))
freq_col_idx = pickle.load(open(savedfreqcolidxfile, 'rb'))
regr = pickle.load(open(savedlogmodelfile, 'rb'))


@app.route('/')
@app.route('/index')
def index():
    return render_template('index.html')


@app.route('/record')
def record():
    print("Recording")
        I = tf.sigmoid(tf.matmul(X, W_xi) + tf.matmul(H, W_hi) + b_i)
        F = tf.sigmoid(tf.matmul(X, W_xf) + tf.matmul(H, W_hf) + b_f)
        O = tf.sigmoid(tf.matmul(X, W_xo) + tf.matmul(H, W_ho) + b_o)
        C_tilda = tf.tanh(tf.matmul(X, W_xc) + tf.matmul(H, W_hc) + b_c)
        C = F * C + I * C_tilda
        H = O * tf.tanh(C)
        Y = tf.matmul(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H, C)


# 训练模型并创作歌词
num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开']

train_and_predict_rnn(lstm, get_params, init_lstm_state, num_hiddens,
                      vocab_size, corpus_indices, idx_to_char, char_to_idx,
                      False, num_epochs, num_steps, lr, clipping_theta,
                      batch_size, pred_period, pred_len, prefixes)

lr = 1e-2  # 注意调整学习率
lstm_layer = keras.layers.LSTM(num_hiddens,
                               time_major=True,
                               return_sequences=True,
                               return_state=True)
model = RNNModel(lstm_layer, vocab_size)
train_and_predict_rnn_keras(model, num_hiddens, vocab_size, corpus_indices,
                            idx_to_char, char_to_idx, num_epochs, num_steps,
                            lr, clipping_theta, batch_size, pred_period,
                            pred_len, prefixes)
Exemple #10
0
from data import StupidBotDataset

dataset = StupidBotDataset("../dataset/data.csv")
dataset_size = len(dataset)
dataset_indices = list(range(dataset_size))

batch_size = 3
# Shuffle dataset indices.
np.random.shuffle(dataset_indices)

train_sampler = SubsetRandomSampler(dataset_indices)
train_loader = torch.utils.data.DataLoader(dataset,
                                           batch_size=batch_size,
                                           sampler=train_sampler)

model = RNNModel(dataset.unique_characters_length,
                 dataset.unique_characters_length)
model.cuda()

# Define loss and optimizer functions.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training the network.
n_epochs = 1000
for epoch in range(1, n_epochs + 1):
    for batch_index, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()

        output, hidden = model(x)  # (24, 24), (1, 1, 32)
        loss = criterion(output, y.view(-1).long())
        loss.backward()
 def model(self, batchSize, stepSize, lr=0.0001):
     return rm.TimeModel(self, batchSize, stepSize, lr)
Exemple #12
0

# Input shape = (Batch Size, Sequence Length, One-Hot Encoding Size).
input_sequences = one_hot_encode(
    input_sequences, unique_characters_size, sequences_length, batch_size
)

# region Define the model.
input_sequences = torch.from_numpy(input_sequences).cuda()
target_sequences = torch.Tensor(target_sequences).cuda()
print(input_sequences.shape)
print(target_sequences.shape)

model = RNNModel(
    input_size=unique_characters_size,
    output_size=unique_characters_size,
    hidden_dim=12,
    n_layers=1,
)
model.cuda()

n_epochs = 100

# Define loss and optimizer functions.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training the network.
for epoch in range(1, n_epochs + 1):
    # Clears existing gradients from previous epoch.
    optimizer.zero_grad()
if __name__ == "__main__":
    print("\n")
    print(
        "Hi! I'm Laughbot! Talk to me and press the Enter key when you want me to decide whether you're funny."
    )
    print(
        "--------------------------------------------------------------------------"
    )

    # set up google cloud credential
    with open('./service_account_key.json', 'r') as f:
        credential = f.read()

    with tf.Graph().as_default():
        model = RNNModel()
        init = tf.global_variables_initializer()

        with tf.Session() as session:
            session.run(init)
            # Load pretrained model
            print("Loading in model")
            new_saver = tf.train.import_meta_graph('saved_models/model.meta',
                                                   clear_devices=True)
            new_saver.restore(session, 'saved_models/model')

            # main REPL loop
            response = raw_input("Press 's' to start: ")
            while response != 'q':
                print("press enter to stop recording")
                record_audio()
Exemple #14
0
    tf.logging.set_verbosity(tf.logging.ERROR)

    with tf.name_scope('tower_0') as scope:
        pass

    x_mixed = tf.placeholder(tf.float32,
                             shape=(None, None, audio.ROWS),
                             name='x_mixed')
    y_src1 = tf.placeholder(tf.float32,
                            shape=(None, None, audio.ROWS),
                            name='y_src1')
    y_src2 = tf.placeholder(tf.float32,
                            shape=(None, None, audio.ROWS),
                            name='y_src2')
    global_step = tf.Variable(0, trainable=False)
    net = RNNModel(x_mixed, y_src1, y_src2, params)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    config = tf.ConfigProto(log_device_placement=True)
    # config = tf.ConfigProto(device_count={'GPU': 0})
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(init_op)

    to_load = []
    for v in tf.trainable_variables():
        to_load.append(v)
    s = tf.train.Saver(to_load, max_to_keep=None)
    latest = my_get_latest(weights)
    from tensorflow.python import pywrap_tensorflow

print('Batchify dataset')
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

print('Build the model')
ntokens = len(corpus.dictionary)
rnn_type = 'LSTM'
model = RNNModel(rnn_type, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.CrossEntropyLoss()

# Save the data into model dir to be used with the model later
for file_name in os.listdir(args.data_dir):
    full_file_name = os.path.join(args.data_dir, file_name)
    if os.path.isfile(full_file_name):
        copy(full_file_name, args.model_dir)

# Save arguments used to create model for restoring the model later
with open(model_info_path, 'wb') as f:
    model_info = {
        'rnn_type': rnn_type,
        'ntoken': ntokens,
        'ninp': args.emsize,
 def multi_input_model(self, batchSize, stepSize, lr=0.0001):
     return rm.MultiInputModel(self, batchSize, stepSize, lr)
 def state_model(self, batchSize, stepSize, lr=0.0001):
     return rm.StateModel(self, batchSize, stepSize, lr)

print('Batchify dataset')
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

print('Build the model')
ntokens = len(corpus.dictionary)
rnn_type = 'LSTM'
model = RNNModel(rnn_type, ntokens, args.emsize, args.nhid, args.nlayers,
                 args.dropout, args.tied).to(device)

criterion = nn.CrossEntropyLoss()

# Save the data into model dir to be used with the model later
for file_name in os.listdir(args.data_dir):
    full_file_name = os.path.join(args.data_dir, file_name)
    if os.path.isfile(full_file_name):
        copy(full_file_name, args.model_dir)

# Save arguments used to create model for restoring the model later
with open(model_info_path, 'wb') as f:
    model_info = {
        'rnn_type': rnn_type,
        'ntoken': ntokens,
        'ninp': args.emsize,
 def runtime_model(self, batchSize):
     return rm.RuntimeModel(self, batchSize)
Exemple #20
0
def train(args):
    config = ParameterConfig()

    data_reader = DataReader(args['data'], config.batch_size, config.seq_length)
    config.vocab_size = data_reader.vocab_size
    
    if not os.path.exists(args['model_dir']):
        os.makedirs(args['model_dir'])
        
    with open(os.path.join(args['model_dir'], 'config.pkl'), 'wb') as f:
        cPickle.dump(config, f)
    with open(os.path.join(args['model_dir'], 'vocab.pkl'), 'wb') as f:
        cPickle.dump((data_reader.tokens, data_reader.vocab), f)

    training_model = RNNModel(config=config)
            
    with tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,config.init_scale)
        
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        
        #Run a single epoch of training
        for epoch in range(config.total_max_epoch):
            current_state = session.run(training_model.initial_state)
                    
            learning_rate_decay = config.lr_decay ** max(epoch - config.max_epoch, 0.0)
            training_model.assign_learningRate(session, config.learning_rate * learning_rate_decay)
                    
            total_cost = 0.0
            total_seq = 0
                    
            data_reader.reset_batch_pointer()
            for batch in range(data_reader.num_batches):
                start = time.time()
                x,y = data_reader.next_batch()
                feed_dict = {training_model.input_data: x, training_model.targets: y, 
                             training_model.initial_state: current_state}
                  
                cost, current_state, _ = session.run([training_model.cost, training_model.final_state, training_model.train_op], 
                                                     feed_dict) 
                 
                total_cost += cost
                total_seq += config.seq_length
                 
                perplexity = np.exp(total_cost / total_seq)
                end = time.time()                 
                
                print("{}/{} (epoch {}), perplexity = {:.3f}, time/batch = {:.3f}" \
                    .format(epoch * data_reader.num_batches + batch,
                            config.total_max_epoch * data_reader.num_batches,
                            epoch, perplexity, end - start))
                sys.stdout.flush()

                if ((epoch * data_reader.num_batches + batch) % 1000 == 0 \
                        or (epoch == config.total_max_epoch - 1 and batch == data_reader.num_batches - 1)):
                    
                    checkpoint_path = os.path.join(args['model_dir'], 'model.ckpt')
                    saver.save(session, checkpoint_path, global_step = epoch * data_reader.num_batches + batch)
                    print("Model saved to {}".format(checkpoint_path))
                    sys.stdout.flush()

    session.close()
Exemple #21
0
def main(_):
    all_sentence, all_tags, all_intent, vocab, dictionary, tags_list, tags_dict, intent_list, intent_dict = prepare_train_data(
        FLAGS.train_data_file, FLAGS.vocab_size)
    train_data, dev_data = split_data(all_sentence, all_tags, all_intent)
    # train_sentence, train_tags, train_intent = train_data
    # dev_sentence, dev_tags, dev_intent = dev_data

    output_path = os.path.join(sys.path[0], 'runs', str(int(time.time())))
    checkpoint_dir = os.path.join(output_path, 'checkpoints')
    os.makedirs(checkpoint_dir, mode=0o755, exist_ok=True)

    save_vocabulary(os.path.join(output_path, 'sentence_vocab'), vocab)
    save_vocabulary(os.path.join(output_path, 'tag_vocab'), tags_list)
    save_vocabulary(os.path.join(output_path, 'intent_vocab'), intent_list)

    model = RNNModel(hidden_size=FLAGS.hidden_size,
                     embed_size=FLAGS.embedding_size,
                     source_vocab_size=len(vocab),
                     tag_vocab_size=len(tags_list),
                     intent_vocab_size=len(intent_list))

    with tf.Session(graph=model.graph) as sess:
        sess.run(tf.initialize_all_variables())

        step = 1
        avg_tag_loss = 0
        avg_intent_loss = 0
        for epoch in range(FLAGS.num_epoch):
            batch_gen = batch_generator(*train_data)
            for sentence_batch, length_batch, tags_batch, intent_batch in batch_gen:
                _, tag_loss, intent_loss = sess.run(
                    [model.train_op, model.tag_loss, model.intent_loss],
                    feed_dict={
                        model.input_x: sentence_batch,
                        model.input_len: length_batch,
                        model.input_tag: tags_batch,
                        model.input_intent: intent_batch,
                        model.keep_prob: FLAGS.dropout_keep_prob
                    })
                avg_tag_loss += tag_loss
                avg_intent_loss += intent_loss
                if step % 20 == 0:
                    avg_tag_loss /= 20
                    avg_intent_loss /= 20
                    print('Step', step, 'Tag loss', tag_loss, 'Intent loss',
                          intent_loss)
                    avg_tag_loss = 0
                    avg_intent_loss = 0
                step += 1

            correct_tag, total_tag = 0, 0
            correct_intent, total_intent = 0, 0
            for sentence, tags, intent in zip(*dev_data):
                predict_tags, predict_intent = sess.run(
                    [model.output_tag, model.output_intent],
                    feed_dict={
                        model.input_x: [sentence],
                        model.input_len: [len(sentence)],
                        model.keep_prob: 1.0
                    })
                for tag1, tag2 in zip(tags, predict_tags[0]):
                    if tag1 == tag2:
                        correct_tag += 1
                    total_tag += 1
                if intent == predict_intent[0]:
                    correct_intent += 1
                total_intent += 1
            tag_accuracy = correct_tag / total_tag
            intent_accuracy = correct_intent / total_intent
            print('[Validation]', 'tag acc =', tag_accuracy, ', intent acc =',
                  intent_accuracy, '\n')
            model.saver.save(
                sess,
                os.path.join(
                    checkpoint_dir,
                    '{}_{:.4f}_{:.4f}.ckpt'.format(epoch, tag_accuracy,
                                                   intent_accuracy)))
Exemple #22
0
# encoded = trn_tok
print("Data loaded")

vocab_size = len(char2int)
hs = 1150
n_fac = 400
sequence_len = 70
batch_size = 30
#0.25, 0.1, 0.2, 0.02, 0.15
# net = CharLSTM(sequence_len=sequence_len, vocab_size=vocab_size, hidden_dim=hs, batch_size=batch_size, n_fac=n_fac, device="cuda:0")
net = RNNModel(rnn_type="LSTM",
               ntoken=vocab_size,
               ninp=hs,
               nhid=hs,
               nlayers=3,
               dropout=0.25,
               dropouth=0.1,
               dropouti=0.2,
               dropoute=0.02,
               wdrop=0,
               tie_weights=False,
               device="cuda:0")
try:
    net.to(net.device)
except:
    net.to(net.device)

# optimizer = optim.Adam(net.parameters(), lr=30, weight_decay=0.0001 )
optimizer = torch.optim.SGD(net.parameters(),
                            lr=1e3,
                            momentum=0.90,
                            weight_decay=1.2e-6,