def model_fn(model_dir): logger.info('Loading the model.') model_info = {} with open(os.path.join(model_dir, 'model_info.pth'), 'rb') as f: model_info = torch.load(f) print('model_info: {}'.format(model_info)) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info('Current device: {}'.format(device)) model = RNNModel(rnn_type=model_info['rnn_type'], ntoken=model_info['ntoken'], ninp=model_info['ninp'], nhid=model_info['nhid'], nlayers=model_info['nlayers'], dropout=model_info['dropout'], tie_weights=model_info['tie_weights']) with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: model.load_state_dict(torch.load(f)) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass model.rnn.flatten_parameters() model.to(device).eval() logger.info('Loading the data.') corpus = data.Corpus(model_dir) logger.info('Done loading model and corpus. Corpus dictionary size: {}'.format(len(corpus.dictionary))) return {'model': model, 'corpus': corpus}
def __init__(self, vocab_size, embed_size, hidden_size, num_layers, num_classes, bidirectional=True, dropout_rate=0.3): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.embed = nn.Embedding(vocab_size, embed_size) # self.rnn = nn.RNN(embed_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) self.rnn = RNNModel(embed_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) self.bidirectional = bidirectional if not bidirectional: self.fc = nn.Linear(hidden_size, num_classes) else: self.fc = nn.Linear(hidden_size * 2, num_classes) self.dropout = nn.Dropout(dropout_rate) self.init_weights()
def main(_): vocab, dictionary = load_vocabulary(os.path.join(FLAGS.vocab_dir, 'sentence_vocab')) tags_list, tags_dict = load_vocabulary(os.path.join(FLAGS.vocab_dir, 'tag_vocab')) intent_list, intent_dict = load_vocabulary(os.path.join(FLAGS.vocab_dir, 'intent_vocab')) all_sentence = prepare_test_data(FLAGS.test_data_file, dictionary) model = RNNModel(hidden_size=FLAGS.hidden_size, embed_size=FLAGS.embedding_size, source_vocab_size=len(vocab), tag_vocab_size=len(tags_list), intent_vocab_size=len(intent_list)) all_tags = [] all_intent = [] with tf.Session(graph=model.graph) as sess: # saver = tf.train.import_meta_graph('{}.meta'.format(FLAGS.checkpoint_file)) model.saver.restore(sess, FLAGS.checkpoint_file) # graph = tf.get_default_graph() # input_x = graph.get_tensor_by_name('input_x:0') # input_len = graph.get_tensor_by_name('input_len:0') # keep_prob = graph.get_tensor_by_name('keep_prob:0') # output_tag = graph.get_tensor_by_name('output_tag:0') # output_intent = graph.get_tensor_by_name('output_intent:0') for sentence in all_sentence: predict_tags, predict_intent = sess.run([model.output_tag, model.output_intent], feed_dict={ model.input_x: [sentence], model.input_len: [len(sentence)], model.keep_prob: 1.0 }) all_tags.append(predict_tags[0]) all_intent.append(predict_intent[0]) all_tags = [['O'] + [tags_list[i] for i in tags] for tags in all_tags] all_intent = [intent_list[i] for i in all_intent] with open(FLAGS.output_tag_file, 'w') as f: f.write('\n'.join([' '.join(tags) for tags in all_tags])) with open(FLAGS.output_intent_file, 'w') as f: f.write('\n'.join(all_intent))
def predict(args): if not os.path.exists(args['model_dir']): raise IOError("Model directory doesn't exist: %s" %(args['model_dir'])) with open(os.path.join(args['model_dir'], 'config.pkl'), 'rb') as f: config = cPickle.load(f) with open(os.path.join(args['model_dir'], 'vocab.pkl'), 'rb') as f: chars, vocab = cPickle.load(f) config.batch_size = 1 config.seq_length = 1 prediction_model = RNNModel(config=config) with tf.Session() as session: tf.initialize_all_variables().run() if not os.path.exists(args['model']): raise IOError("Model file doesn't exist: %s" %(args['model'])) saver = tf.train.Saver(tf.all_variables()) saver.restore(session, args['model']) state = session.run(prediction_model.cells.zero_state(1, tf.float32)) output = args['prime'] for i in range(args['num_chars']): char = output[i] x = np.full((config.batch_size, config.seq_length), vocab[char], dtype=np.int32) feed = {prediction_model.input_data: x, prediction_model.initial_state: state} [predictionSoftmax, state] = session.run([prediction_model._predictionSoftmax, prediction_model.final_state], feed) probs = predictionSoftmax[0] next_char = chars[pick(probs)] output += next_char char = next_char print('Prediction: %s \n' % (output)) sys.stdout.flush()
for X in inputs: X=tf.reshape(X,[-1,W_xh.shape[0]]) Z = tf.sigmoid(tf.matmul(X, W_xz) + tf.matmul(H, W_hz) + b_z) R = tf.sigmoid(tf.matmul(X, W_xr) + tf.matmul(H, W_hr) + b_r) H_tilda = tf.tanh(tf.matmul(X, W_xh) + tf.matmul(R * H, W_hh) + b_h) H = Z * H + (1 - Z) * H_tilda Y = tf.matmul(H, W_hq) + b_q outputs.append(Y) return outputs, (H,) # 训练模型并创作歌词 num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2 pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开'] train_and_predict_rnn(gru, get_params, init_gru_state, num_hiddens, vocab_size, corpus_indices, idx_to_char, char_to_idx, False, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes) gru_layer = keras.layers.GRU(num_hiddens,time_major=True,return_sequences=True,return_state=True) model = RNNModel(gru_layer, vocab_size) train_and_predict_rnn_keras(model, num_hiddens, vocab_size, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)
sou_sense_word = torch.sparse.FloatTensor(torch.LongTensor([sssp1, sssp2]), torch.FloatTensor([1.0] * sssp_tot), torch.Size([nsenses, ntokens])) sou_sense_word_t = torch.sparse.FloatTensor(torch.LongTensor([sssp2, sssp1]), torch.FloatTensor([1.0] * sssp_tot), torch.Size([ntokens, nsenses])) if args.cuda: sou_sememe_sense = sou_sememe_sense.cuda() sou_sememe_sense_t = sou_sememe_sense_t.cuda() sou_sense_word = sou_sense_word.cuda() sou_sense_word_t = sou_sense_word_t.cuda() sou_sparsemm1 = hownet_utils.spmm(sou_sememe_sense, sou_sememe_sense_t) sou_sparsemm2 = hownet_utils.spmm(sou_sense_word, sou_sense_word_t) model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied, nsememes=nsememes, use_cuda=args.cuda, nsenses=nsenses, word_idx_s=word_idx_s) # if args.cuda: # model.cuda() # # criterion = nn.CrossEntropyLoss() # logsoftmax = nn.LogSoftmax() # # ############################################################################### # # Training code # ############################################################################### # # # def repackage_hidden(h): # """Wraps hidden states in new Variables, to detach them from their history.""" # if type(h) == Variable:
laughFiles = [ "laughtracks/laughtrack{}.wav".format(i) for i in range(1, 8) ] rand = np.random.randint(0, len(laughFiles)) return_code = subprocess.call(["afplay", laughFiles[rand]]) # set up google cloud credential with open('service_account_key.json', 'r') as f: credential = f.read() # initialize model print("Setting up") graph = tf.Graph() with graph.as_default(): model = RNNModel() init = tf.global_variables_initializer() vocab = pickle.load(open(savedvocabularyfile, 'rb')) freq_col_idx = pickle.load(open(savedfreqcolidxfile, 'rb')) regr = pickle.load(open(savedlogmodelfile, 'rb')) @app.route('/') @app.route('/index') def index(): return render_template('index.html') @app.route('/record') def record(): print("Recording")
I = tf.sigmoid(tf.matmul(X, W_xi) + tf.matmul(H, W_hi) + b_i) F = tf.sigmoid(tf.matmul(X, W_xf) + tf.matmul(H, W_hf) + b_f) O = tf.sigmoid(tf.matmul(X, W_xo) + tf.matmul(H, W_ho) + b_o) C_tilda = tf.tanh(tf.matmul(X, W_xc) + tf.matmul(H, W_hc) + b_c) C = F * C + I * C_tilda H = O * tf.tanh(C) Y = tf.matmul(H, W_hq) + b_q outputs.append(Y) return outputs, (H, C) # 训练模型并创作歌词 num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2 pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开'] train_and_predict_rnn(lstm, get_params, init_lstm_state, num_hiddens, vocab_size, corpus_indices, idx_to_char, char_to_idx, False, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes) lr = 1e-2 # 注意调整学习率 lstm_layer = keras.layers.LSTM(num_hiddens, time_major=True, return_sequences=True, return_state=True) model = RNNModel(lstm_layer, vocab_size) train_and_predict_rnn_keras(model, num_hiddens, vocab_size, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)
from data import StupidBotDataset dataset = StupidBotDataset("../dataset/data.csv") dataset_size = len(dataset) dataset_indices = list(range(dataset_size)) batch_size = 3 # Shuffle dataset indices. np.random.shuffle(dataset_indices) train_sampler = SubsetRandomSampler(dataset_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) model = RNNModel(dataset.unique_characters_length, dataset.unique_characters_length) model.cuda() # Define loss and optimizer functions. criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # Training the network. n_epochs = 1000 for epoch in range(1, n_epochs + 1): for batch_index, (x, y) in enumerate(train_loader): optimizer.zero_grad() output, hidden = model(x) # (24, 24), (1, 1, 32) loss = criterion(output, y.view(-1).long()) loss.backward()
def model(self, batchSize, stepSize, lr=0.0001): return rm.TimeModel(self, batchSize, stepSize, lr)
# Input shape = (Batch Size, Sequence Length, One-Hot Encoding Size). input_sequences = one_hot_encode( input_sequences, unique_characters_size, sequences_length, batch_size ) # region Define the model. input_sequences = torch.from_numpy(input_sequences).cuda() target_sequences = torch.Tensor(target_sequences).cuda() print(input_sequences.shape) print(target_sequences.shape) model = RNNModel( input_size=unique_characters_size, output_size=unique_characters_size, hidden_dim=12, n_layers=1, ) model.cuda() n_epochs = 100 # Define loss and optimizer functions. criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # Training the network. for epoch in range(1, n_epochs + 1): # Clears existing gradients from previous epoch. optimizer.zero_grad()
if __name__ == "__main__": print("\n") print( "Hi! I'm Laughbot! Talk to me and press the Enter key when you want me to decide whether you're funny." ) print( "--------------------------------------------------------------------------" ) # set up google cloud credential with open('./service_account_key.json', 'r') as f: credential = f.read() with tf.Graph().as_default(): model = RNNModel() init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) # Load pretrained model print("Loading in model") new_saver = tf.train.import_meta_graph('saved_models/model.meta', clear_devices=True) new_saver.restore(session, 'saved_models/model') # main REPL loop response = raw_input("Press 's' to start: ") while response != 'q': print("press enter to stop recording") record_audio()
tf.logging.set_verbosity(tf.logging.ERROR) with tf.name_scope('tower_0') as scope: pass x_mixed = tf.placeholder(tf.float32, shape=(None, None, audio.ROWS), name='x_mixed') y_src1 = tf.placeholder(tf.float32, shape=(None, None, audio.ROWS), name='y_src1') y_src2 = tf.placeholder(tf.float32, shape=(None, None, audio.ROWS), name='y_src2') global_step = tf.Variable(0, trainable=False) net = RNNModel(x_mixed, y_src1, y_src2, params) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) config = tf.ConfigProto(log_device_placement=True) # config = tf.ConfigProto(device_count={'GPU': 0}) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init_op) to_load = [] for v in tf.trainable_variables(): to_load.append(v) s = tf.train.Saver(to_load, max_to_keep=None) latest = my_get_latest(weights) from tensorflow.python import pywrap_tensorflow
print('Batchify dataset') eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### print('Build the model') ntokens = len(corpus.dictionary) rnn_type = 'LSTM' model = RNNModel(rnn_type, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.CrossEntropyLoss() # Save the data into model dir to be used with the model later for file_name in os.listdir(args.data_dir): full_file_name = os.path.join(args.data_dir, file_name) if os.path.isfile(full_file_name): copy(full_file_name, args.model_dir) # Save arguments used to create model for restoring the model later with open(model_info_path, 'wb') as f: model_info = { 'rnn_type': rnn_type, 'ntoken': ntokens, 'ninp': args.emsize,
def multi_input_model(self, batchSize, stepSize, lr=0.0001): return rm.MultiInputModel(self, batchSize, stepSize, lr)
def state_model(self, batchSize, stepSize, lr=0.0001): return rm.StateModel(self, batchSize, stepSize, lr)
def runtime_model(self, batchSize): return rm.RuntimeModel(self, batchSize)
def train(args): config = ParameterConfig() data_reader = DataReader(args['data'], config.batch_size, config.seq_length) config.vocab_size = data_reader.vocab_size if not os.path.exists(args['model_dir']): os.makedirs(args['model_dir']) with open(os.path.join(args['model_dir'], 'config.pkl'), 'wb') as f: cPickle.dump(config, f) with open(os.path.join(args['model_dir'], 'vocab.pkl'), 'wb') as f: cPickle.dump((data_reader.tokens, data_reader.vocab), f) training_model = RNNModel(config=config) with tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale,config.init_scale) tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) #Run a single epoch of training for epoch in range(config.total_max_epoch): current_state = session.run(training_model.initial_state) learning_rate_decay = config.lr_decay ** max(epoch - config.max_epoch, 0.0) training_model.assign_learningRate(session, config.learning_rate * learning_rate_decay) total_cost = 0.0 total_seq = 0 data_reader.reset_batch_pointer() for batch in range(data_reader.num_batches): start = time.time() x,y = data_reader.next_batch() feed_dict = {training_model.input_data: x, training_model.targets: y, training_model.initial_state: current_state} cost, current_state, _ = session.run([training_model.cost, training_model.final_state, training_model.train_op], feed_dict) total_cost += cost total_seq += config.seq_length perplexity = np.exp(total_cost / total_seq) end = time.time() print("{}/{} (epoch {}), perplexity = {:.3f}, time/batch = {:.3f}" \ .format(epoch * data_reader.num_batches + batch, config.total_max_epoch * data_reader.num_batches, epoch, perplexity, end - start)) sys.stdout.flush() if ((epoch * data_reader.num_batches + batch) % 1000 == 0 \ or (epoch == config.total_max_epoch - 1 and batch == data_reader.num_batches - 1)): checkpoint_path = os.path.join(args['model_dir'], 'model.ckpt') saver.save(session, checkpoint_path, global_step = epoch * data_reader.num_batches + batch) print("Model saved to {}".format(checkpoint_path)) sys.stdout.flush() session.close()
def main(_): all_sentence, all_tags, all_intent, vocab, dictionary, tags_list, tags_dict, intent_list, intent_dict = prepare_train_data( FLAGS.train_data_file, FLAGS.vocab_size) train_data, dev_data = split_data(all_sentence, all_tags, all_intent) # train_sentence, train_tags, train_intent = train_data # dev_sentence, dev_tags, dev_intent = dev_data output_path = os.path.join(sys.path[0], 'runs', str(int(time.time()))) checkpoint_dir = os.path.join(output_path, 'checkpoints') os.makedirs(checkpoint_dir, mode=0o755, exist_ok=True) save_vocabulary(os.path.join(output_path, 'sentence_vocab'), vocab) save_vocabulary(os.path.join(output_path, 'tag_vocab'), tags_list) save_vocabulary(os.path.join(output_path, 'intent_vocab'), intent_list) model = RNNModel(hidden_size=FLAGS.hidden_size, embed_size=FLAGS.embedding_size, source_vocab_size=len(vocab), tag_vocab_size=len(tags_list), intent_vocab_size=len(intent_list)) with tf.Session(graph=model.graph) as sess: sess.run(tf.initialize_all_variables()) step = 1 avg_tag_loss = 0 avg_intent_loss = 0 for epoch in range(FLAGS.num_epoch): batch_gen = batch_generator(*train_data) for sentence_batch, length_batch, tags_batch, intent_batch in batch_gen: _, tag_loss, intent_loss = sess.run( [model.train_op, model.tag_loss, model.intent_loss], feed_dict={ model.input_x: sentence_batch, model.input_len: length_batch, model.input_tag: tags_batch, model.input_intent: intent_batch, model.keep_prob: FLAGS.dropout_keep_prob }) avg_tag_loss += tag_loss avg_intent_loss += intent_loss if step % 20 == 0: avg_tag_loss /= 20 avg_intent_loss /= 20 print('Step', step, 'Tag loss', tag_loss, 'Intent loss', intent_loss) avg_tag_loss = 0 avg_intent_loss = 0 step += 1 correct_tag, total_tag = 0, 0 correct_intent, total_intent = 0, 0 for sentence, tags, intent in zip(*dev_data): predict_tags, predict_intent = sess.run( [model.output_tag, model.output_intent], feed_dict={ model.input_x: [sentence], model.input_len: [len(sentence)], model.keep_prob: 1.0 }) for tag1, tag2 in zip(tags, predict_tags[0]): if tag1 == tag2: correct_tag += 1 total_tag += 1 if intent == predict_intent[0]: correct_intent += 1 total_intent += 1 tag_accuracy = correct_tag / total_tag intent_accuracy = correct_intent / total_intent print('[Validation]', 'tag acc =', tag_accuracy, ', intent acc =', intent_accuracy, '\n') model.saver.save( sess, os.path.join( checkpoint_dir, '{}_{:.4f}_{:.4f}.ckpt'.format(epoch, tag_accuracy, intent_accuracy)))
# encoded = trn_tok print("Data loaded") vocab_size = len(char2int) hs = 1150 n_fac = 400 sequence_len = 70 batch_size = 30 #0.25, 0.1, 0.2, 0.02, 0.15 # net = CharLSTM(sequence_len=sequence_len, vocab_size=vocab_size, hidden_dim=hs, batch_size=batch_size, n_fac=n_fac, device="cuda:0") net = RNNModel(rnn_type="LSTM", ntoken=vocab_size, ninp=hs, nhid=hs, nlayers=3, dropout=0.25, dropouth=0.1, dropouti=0.2, dropoute=0.02, wdrop=0, tie_weights=False, device="cuda:0") try: net.to(net.device) except: net.to(net.device) # optimizer = optim.Adam(net.parameters(), lr=30, weight_decay=0.0001 ) optimizer = torch.optim.SGD(net.parameters(), lr=1e3, momentum=0.90, weight_decay=1.2e-6,