def train_model(model, args, X_train, X_valid, y_train, y_valid): """ Train the model """ checkpoint = ModelCheckpoint('model-{epoch:03d}.h5', monitor='val_loss', verbose=0, save_best_only=args.save_best_only, mode='auto') model.compile(loss='mean_squared_error', optimizer=Adam(lr=args.learning_rate)) model.fit_generator(batch_generator(args.data_dir, X_train, y_train, args.batch_size, True), args.samples_per_epoch, args.nb_epoch, max_q_size=1, validation_data=batch_generator(args.data_dir, X_valid, y_valid, args.batch_size, False), nb_val_samples=len(X_valid), callbacks=[checkpoint], verbose=1)
def train_model(model, args, X_train, X_valid, y_train, y_valid): """ Train the model """ #Saves the model after every epoch. #quantity to monitor, verbosity i.e logging mode (0 or 1), #if save_best_only is true the latest best model according to the quantity monitored will not be overwritten. #mode: one of {auto, min, max}. If save_best_only=True, the decision to overwrite the current save file is # made based on either the maximization or the minimization of the monitored quantity. For val_acc, #this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically # inferred from the name of the monitored quantity. checkpoint = ModelCheckpoint('model-{epoch:03d}.h5', monitor='val_loss', verbose=0, save_best_only=args.save_best_only, mode='auto') #calculate the difference between expected steering angle and actual steering angle #square the difference #add up all those differences for as many data points as we have #divide by the number of them #that value is our mean squared error! this is what we want to minimize via #gradient descent model.compile(loss='mean_squared_error', optimizer=Adam(lr=args.learning_rate)) #Fits the model on data generated batch-by-batch by a Python generator. #The generator is run in parallel to the model, for efficiency. #For instance, this allows you to do real-time data augmentation on images on CPU in #parallel to training your model on GPU. #so we reshape our data into their appropriate batches and train our model simulatenously model.fit_generator(batch_generator(args.data_dir, X_train, y_train, args.batch_size, True), args.samples_per_epoch, args.nb_epoch, max_q_size=1, validation_data=batch_generator(args.data_dir, X_valid, y_valid, args.batch_size, False), nb_val_samples=len(X_valid), callbacks=[checkpoint], verbose=1)
avg_train_acc = 0 avg_test_loss = 0 avg_test_acc = 0 prev_loss = 100 if path_to_model: training_model.load_weights(path_to_model) # TODO: Fix double digit epoch numbers starting_epoch = int(path_to_model[-4]) # Conventionally take the number before the extension as an epoch to start print 'Built model, starting training.' logging.info('Epochs \tAgv train loss \tAvg test loss \tAvg train acc \tAvg test acc') for epoch in range((starting_epoch + 1), NUM_EPOCHS): t1 = time.time() training_model.reset_states() for i, (x, y) in enumerate(batch_generator(train_data, char_to_idx, BATCH_SIZE, SEQ_LEN, vocab_size)): loss, accuracy = training_model.train_on_batch(x, y) avg_train_loss += loss avg_train_acc += accuracy avg_train_loss /= (i + 1) avg_train_acc /= (i + 1) t2 = time.time() print "Epoch %i took %f minutes." % (epoch, ((t2 - t1)/60)) for i, (x, y) in enumerate(batch_generator(test_data, char_to_idx, BATCH_SIZE, SEQ_LEN, vocab_size)): loss, accuracy = training_model.test_on_batch(x, y) avg_test_loss += loss avg_test_acc += accuracy avg_test_loss /= (i + 1) avg_test_acc /= (i + 1)
losses.append(loss) loss = losses[0] for i in range(1, length): loss += losses[i] loss = loss / length return loss nb_epochs = 10 batch_size = 8 nb_batches = 30 gen = batch_generator(batch_size, nb_batches) model = JointMultiTaskModel() adam = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(nb_epochs): for batch in range(nb_batches): text, tags, chunks, sent = next(gen) out = model.forward(text) loss = model.loss(out, tags, chunks, sent) print("Epoch:", epoch, "Bacth:", batch, "Loss:", loss.data[0])
nb_epoch = 100 samples_per_epoch = 1000 batch_size = 32 save_best_only = True learning_rate = 1e-4 # Checkpoint này để nói cho model lưu lại model nếu validation loss thấp nhất checkpoint = ModelCheckpoint('../traindata/model-{epoch:03d}.h5', monitor='val_loss', verbose=0, save_best_only=save_best_only, mode='auto') # Dùng mean_squrared_error làm loss function model.compile(loss='mean_squared_error', optimizer=Adam(lr=learning_rate)) # Train model H = model.fit_generator(batch_generator(data_dir, X_train, y_train, batch_size, True), steps_per_epoch=samples_per_epoch, epochs=nb_epoch, max_q_size=1, validation_data=batch_generator( data_dir, X_valid, y_valid, batch_size, False), nb_val_samples=len(X_valid), callbacks=[checkpoint], verbose=1) print("Trained!")
def main(): parser = argparse.ArgumentParser(__doc__) parser.add_argument("bert_model", type=str, help="Variant of pre-trained model.") parser.add_argument( "layer", type=int, help="Layer from of layer from which the representation is taken.") parser.add_argument("language_list", type=str, help="TSV file with available languages.") parser.add_argument("data", type=str, help="Directory with txt files.") parser.add_argument("target", type=str, help="npz file with saved centroids.") parser.add_argument("--num-threads", type=int, default=4) parser.add_argument( "--mean-pool", default=False, action="store_true", help="If true, use mean-pooling instead of [CLS] vecotr.") parser.add_argument("--batch-size", type=int, default=32) parser.add_argument("--batch-count", type=int, default=200) args = parser.parse_args() torch.set_num_threads(args.num_threads) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') tokenizer, model = load_bert(args.bert_model, device)[:2] language_names = [] centroids = [] with open(args.language_list) as lng_f: for line in lng_f: name, code = line.strip().split("\t") data_file = os.path.join(args.data, f"{code}.txt") data = text_data_generator(data_file, tokenizer) batches = batch_generator(data, args.batch_size) print(f"Data iterator initialized: {data_file}") with torch.no_grad(): representations = [] for _, txt in zip(range(args.batch_count), batches): batch_repr = get_repr_from_layer( model, txt.to(device), args.layer, mean_pool=args.mean_pool).cpu().numpy() if not np.any(np.isnan(batch_repr)): representations.append(batch_repr) if representations: language_names.append(name) centroid = np.concatenate(representations, axis=0).mean(0) centroids.append(centroid) print("Centroids computed.") np.savez(args.target, languages=language_names, centroids=centroids)
x = x[r < 1] y = one_hot((x[:, 0] > 0).astype("int")) c = x[:, 1, None] class_weights = [1., 1.] print("Class weights:", class_weights) ids = np.prod(x, axis=1) > 0 train = Data(x=x[ids], y=y[ids], c=c[ids]) ids = np.prod(x, axis=1) < 0 val = Data(x=x[ids], y=y[ids], c=c[ids]) train.batches = batch_generator(train.x, train.y, train.c, batch_size=100, infinite=True) val.batches = batch_generator(val.x, val.y, val.c, batch_size=100, infinite=True) print("Building model...") drop_rate = 0.1 sigma = 0.002 class Model_invrep(invrep_supervised.Model): @staticmethod
options['lr_g'] = 0.001 options['lr_d'] = 0.001 options['reg_tgt'] = 1.0 description = utils.description(sources, targets) description = description + '_DANN_' + str(options['reg_disc']) tf.reset_default_graph() graph = tf.get_default_graph() model = MNISTModel_DANN(options) sess = tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=gpu_options)) tf.global_variables_initializer().run(session=sess) record = [] gen_source_batch = utils.batch_generator( [source_train['images'], source_train['labels'], source_train['domains']], batch_size) gen_target_batch = utils.batch_generator( [target_train['images'], target_train['labels'], target_train['domains']], batch_size) gen_source_batch_valid = utils.batch_generator([ np.concatenate([source_valid['images'], source_test['images']]), np.concatenate([source_valid['labels'], source_test['labels']]), np.concatenate([source_valid['domains'], source_test['domains']]) ], batch_size) gen_target_batch_valid = utils.batch_generator([ np.concatenate([target_valid['images'], target_test['images']]), np.concatenate([target_valid['labels'], target_test['labels']]),
gradients = tf.gradients(critic_out, [h2_whole])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes-1.)**2) theta_C = [v for v in tf.global_variables() if 'classifier' in v.name] theta_D = [v for v in tf.global_variables() if 'critic' in v.name] theta_G = [v for v in tf.global_variables() if 'generator' in v.name] wd_d_op = tf.train.AdamOptimizer(lr_wd_D).minimize(-wd_loss+gp_param*gradient_penalty, var_list=theta_D) all_variables = tf.trainable_variables() l2_loss = l2_param * tf.add_n([tf.nn.l2_loss(v) for v in all_variables if 'bias' not in v.name]) total_loss = clf_loss + l2_loss + wd_param * wd_loss train_op = tf.train.AdamOptimizer(lr).minimize(total_loss, var_list=theta_G + theta_C) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) S_batches = utils.batch_generator([xs, ys], batch_size / 2) T_batches = utils.batch_generator([xt, yt], batch_size / 2) for i in range(num_steps): xs_batch, ys_batch = S_batches.next() xt_batch, yt_batch = T_batches.next() xb = np.vstack([xs_batch, xt_batch]) yb = np.vstack([ys_batch, yt_batch]) for _ in range(D_train_num): sess.run(wd_d_op, feed_dict={x: xb, train_flag: True}) sess.run(train_op, feed_dict={x: xb, y_: yb, train_flag: True}) if i % 200 == 0: acc, clf_ls = sess.run([clf_acc, clf_loss], feed_dict={x: xs_test, y_: ys_test, train_flag: False}) acc_m, clf_ls_m = sess.run([clf_acc, clf_loss], feed_dict={x: xt_test, y_: yt_test, train_flag: False})
def main(): parser = argparse.ArgumentParser(description='Behavioral Cloning Training Program') parser.add_argument('-d', help='data directory', dest='data_dir', type=str, default='./data/IMG') parser.add_argument('-t', help='test size fraction', dest='test_size', type=float, default=0.2) parser.add_argument('-k', help='drop out probability', dest='keep_prob', type=float, default=0.5) parser.add_argument('-n', help='number of epochs', dest='nb_epoch', type=int, default=10) parser.add_argument('-s', help='samples per epoch', dest='samples_per_epoch', type=int, default=20000) parser.add_argument('-b', help='batch size', dest='batch_size', type=int, default=40) # parser.add_argument('-o', help='save best models only', dest='save_best_only', type=s2b, default='true') parser.add_argument('-l', help='learning rate', dest='learning_rate', type=float, default=1.0e-4) parser.add_argument('-cuda', help='ables CUDA training', dest='cuda', type=bool, default=False) args = parser.parse_args() net = Model() if args.cuda: net.cuda() print(net) criterion = nn.MSELoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) x_train, x_valid, x_test, y_train, y_valid, y_test = load_data(args) trainloader = batch_generator('./data/IMG', x_train, y_train, 32, is_training=True) testloader = batch_generator('./data/IMG', x_test, y_test, 32, is_training=False) for epoch in range(10): running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs, target = data inputs = np.transpose(inputs, [0, 3, 1, 2]) inputs = torch.from_numpy(inputs).float() target = torch.from_numpy(target).float() if args.cuda: inputs, target = inputs.cuda(), target.cuda() # wrap them in Variable inputs, target = Variable(inputs), Variable(target) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, target) # loss.backward() # optimizer.step() # print statistics running_loss += loss.data[0] print(i) if i % 10 == 9: # print every 10 mini-batches print('[%d, %5d] loss: %.5f' % (epoch + 1, i + 1, running_loss / 10)) running_loss = 0.0 print('Finished Training') running_loss = list() for i, data in enumerate(testloader, 0): # get the inputs inputs, target = data inputs = np.transpose(inputs, [0, 3, 1, 2]) inputs = torch.from_numpy(inputs).float() target = torch.from_numpy(target).float() if args.cuda: inputs, target = inputs.cuda(), target.cuda() # wrap them in Variable inputs, target = Variable(inputs), Variable(target) # forward outputs = net(inputs) loss = criterion(outputs, target) # store loss running_loss.append(loss.data[0]) print('Average test MSE loss : {}'.format(sum(running_loss) / len(running_loss)))