def train(self, porportion=0.25): logger.info('Train model...') num_iter = int(60000 * porportion // self.num_batch) + 1 logger.info(f'1 Epoch training iteration will be: {num_iter}') for i in range(num_iter): batch = get_batch(train=True, batch_size=self.num_batch) feed_dict = {self.model.images: batch['x'], self.model.labels: batch['y'], self.model.is_training: True} try: _, summary, l, acc, lrn_rate = \ self.sess.run([self.model.train_op, self.summary, self.model.cost, self.model.acc, self.model.lrn_rate], feed_dict=feed_dict) except KeyboardInterrupt: self.close() sys.exit() except tf.errors.InvalidArgumentError: continue else: global_step = self.sess.run(self.model.global_step) self.train_writer.add_summary(summary, global_step) self.sess.run(self.model.increase_global_step) if i % 2 == 0: logger.debug( f'Train step {i} | Loss: {l:.3f} | Accuracy: {acc:.3f} | Global step: {global_step} | Learning rate: {lrn_rate}')
def test(self, porportion=0.25): logger.info('Evaluate model...') num_iter = int(1000 * porportion // self.num_batch) + 1 logger.info(f'1 Epoch training iteration will be: {num_iter}') t_l, t_acc = 0, 0 for i in range(num_iter): batch = get_batch(train=False, batch_size=self.num_batch) feed_dict = {self.model.images: batch['x'], self.model.labels: batch['y'], self.model.is_training: False} try: summary, l, acc = \ self.sess.run([self.summary, self.model.cost, self.model.acc], feed_dict=feed_dict) except KeyboardInterrupt: self.close() sys.exit() except tf.errors.InvalidArgumentError: continue else: global_step = self.sess.run(self.model.global_step) self.test_writer.add_summary(summary, global_step) t_l += l t_acc += acc if i % 1 == 0: logger.debug( f'Test step {i} | Loss: {l:.3f} | Accuracy: {acc:.3f} | Global step: {global_step}') return t_l / num_iter, t_acc / num_iter
def train_model_keras(songs_joined, model): for i in range(num_iters): x_train, y_train = get_batch(songs_joined, seq_length=seq_length, batch_size=batch_size) print(f"\nITERATION NUMBER {i}/{num_iters}\n") history = model.fit(x_train, y_train, batch_size=batch_size, epochs=1) if i == 0: accum_history = history merge_history(accum_history, history) if i % 100 == 0: model.save(save_path) model.save(save_path) return accum_history
def train(itMax=100, szBatch=256, lr=0.01, vaFreq=10, pa_init=dft_pa_init, mo_create=dft_mo_create, data_load=dft_data_load): print 'loading data...' dataTr, dataVa, _ = data_load() print 'building graph...' # initialize parameters theta = pa_init() # fprop: the prediction model for MLP x, F = mo_create(theta, is_tr=True) # fprop: the loss y = T.ivector('y') ell = loss.create_logistic(F, y) # bprop dtheta = T.grad(ell, wrt=theta) # the graph for training ibat = T.lscalar('ibat') fg_tr = graph_mgr( inputs=[ibat], outputs=ell, updates=zip(theta, optim.update_gd(theta, dtheta)), givens={ x: dataset.get_batch(ibat, dataTr[0], szBatch=szBatch), y: dataset.get_batch(ibat, dataTr[1], szBatch=szBatch) } ) # the graph for validation ell_zo = loss.create_zeroone(F, y) fg_va = graph_mgr( inputs=[], outputs=ell_zo, givens={ x: dataVa[0], y: dataVa[1] } ) print 'Fire the graph...' trLoss, er_va = [], [] N = dataTr[0].get_value(borrow=True).shape[0] numBatch = (N + szBatch) / szBatch print '#batch = %d' % (numBatch,) for i in xrange(itMax): ibat = i % numBatch tmpLoss = fg_tr(ibat) print 'training: iteration %d, ibat = %d, loss = %6.5f' % (i, ibat, tmpLoss) trLoss.append(tmpLoss) if i%vaFreq == 0: tmp_er = fg_va() print 'validation: iteration %d, error rate = %6.5f' % (i, tmp_er) er_va.append(tmp_er) # plot import matplotlib.pyplot as plt plt.subplot(1, 2, 1) plt.plot(range(1, len(trLoss)+1), trLoss, 'ro-') plt.subplot(1, 2, 2) plt.plot([i*vaFreq for i in range(len(er_va))], er_va, 'bx-') plt.show(block=True) # return the parameters return theta
(train_X, train_Y), (test_X, test_Y) = \ dataset.gen_co_model_data(data, TIME_STEPS, OUTPUT_SIZE, pick_feature) # Блок определения rnn model = Sequential() model.add(LSTM( input_shape=(TIME_STEPS, INPUT_SIZE), output_dim=CELL_SIZE, )) model.add(Dense(OUTPUT_SIZE)) model.compile(optimizer=RMSprop(LR), loss='mse') batch_start = 0 for step in range(500): x, y = dataset.get_batch(train_X, train_Y, BATCH_SIZE, batch_start) cost = model.train_on_batch(x, y) # batch_start += BATCH_SIZE if step % 10 == 0: print('train cost: ', cost) model.fit(train_X, train_Y, nb_epoch=20, batch_size=30) model.save('result/pred_next_5_co.model') pred_Y = model.predict(train_X) gc.collect() print("testing data mse: %f" % mean_squared_error(test_Y, pred_Y)) with open('result/pred_next_5_days_co.result', 'w') as f: json.dump({'pred': train_Y.tolist(), 'eval': pred_Y.tolist()}, f)
def main(argv=None): if not tf.gfile.Exists(cfg.checkpoint_path): tf.gfile.MkDir(cfg.checkpoint_path) else: if not cfg.restore: tf.gfile.DeleteRecursively(cfg.checkpoint_path) tf.gfile.MkDir(cfg.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_label_maps = tf.placeholder(tf.float32, shape=[None, None, None, 6], name='input_label_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(cfg.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_label_maps_split = tf.split(input_label_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: total_loss, model_loss = tower_loss(input_images_split[i], input_label_maps_split[i], input_training_masks_split[i], reuse_variables) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage(cfg.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(cfg.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() # if cfg.pretrained_model_path is not None: # variable_restore_op = slim.assign_from_checkpoint_fn(cfg.pretrained_model_path, # slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if cfg.restore: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(cfg.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if cfg.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn(cfg.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) variable_restore_op(sess) data_generator = dataset.get_batch(train_data_path=cfg.train_data_path, num_workers=cfg.num_readers, input_size=cfg.input_size, batch_size=cfg.batch_size_per_gpu * len(gpus)) TM_BEGIN('step_time') for step in range(cfg.max_steps): data = next(data_generator) ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0], input_label_maps: data[1], input_training_masks: data[2]}) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: TM_END('step_time') TM_BEGIN('step_time') avg_time_per_step = TM_PICK('step_time') / 10. avg_examples_per_second = cfg.batch_size_per_gpu * len(gpus) / avg_time_per_step print('Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} ' 'examples/second'.format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % cfg.save_checkpoint_steps == 0: saver.save(sess, cfg.checkpoint_path + 'model.ckpt', global_step=global_step) if step % cfg.save_summary_steps == 0: _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0], input_label_maps: data[1], input_training_masks: data[2]}) summary_writer.add_summary(summary_str, global_step=step)
from torch.cuda import amp # pytorch version >= v1.6 scaler = amp.GradScaler() save_threshold = args.save_interval champ_val_loss = 200 epochs = args.epochs # start train for epoch in trange(start_epoch, epochs, desc="Epoch"): train_loss = 0.0 text_dec.train() for iteration, samples in enumerate( tqdm(train_loader, desc="Train iteration")): # sort data src, tgt, length, img_feat = get_batch(samples) B = img_feat.shape[0] if args.gpu >= 0: src = src.to(device) tgt = tgt.to(device) length = length.to(device) img_feat = img_feat.to(device) # encode image hidden = text_dec.embed_img(img_feat) if not use_single_prec: # decode caption y, word_emb, text_emb = text_dec(src, length, hidden) # y:(B, ntoken, L)
import dataset import numpy as np import tensorflow as tf import model import os checkpoint_prefix = 'checkpoints/' graph = tf.get_default_graph() with tf.Session(graph=graph) as sess: checkpoint_file = tf.train.latest_checkpoint(checkpoint_prefix) saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) x = graph.get_tensor_by_name("x:0") logits = graph.get_tensor_by_name("logits/BiasAdd:0") batch_x, batch_y = dataset.get_batch(size=64) logits_ = sess.run(logits, feed_dict={x: batch_x}) count = 0 for logit_, y_ in zip(logits_, batch_y): # print np.argmax(logit_), y_ if np.argmax(logit_) == y_: count += 1 print count