def main(): input_args = train_input() print_model(input_args) device = torch.device("cuda:0" if torch.cuda.is_available() and input_args.gpu == True else "cpu") model = create_model(input_args.arch, input_args.hidden_units) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), input_args.learning_rate,) exp_lr_scheduler = lr_scheduler.StepLR( optimizer, step_size=5, gamma=0.1) image_datasets, dataloaders = create_dataloaders( input_args.data_dir) train(model, dataloaders, image_datasets, criterion, optimizer, exp_lr_scheduler, device, input_args.epochs) if input_args.save_dir: model.cpu() save_checkpoint({ 'epoch': input_args.epochs, 'arch': input_args.arch, 'classifier': model.classifier, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'mapping': image_datasets['train'].class_to_idx }, input_args.save_dir)
def train(): # load data train_X = np.load("./data/%s/%s_%d_movement_train_X.npy" % (FLAGS.dataset, FLAGS.dataset, FLAGS.decoder_size)) train_y = np.load("./data/%s/%s_%d_movement_train_y.npy" % (FLAGS.dataset, FLAGS.dataset, FLAGS.decoder_size)) test_X = np.load("./data/%s/%s_%d_movement_test_X.npy" % (FLAGS.dataset, FLAGS.dataset, FLAGS.decoder_size)) test_y = np.load("./data/%s/%s_%d_movement_test_y.npy" % (FLAGS.dataset, FLAGS.dataset, FLAGS.decoder_size)) tolerance_count = 0 checkpoint_dir = os.path.join(save_model_dir, "%dhidden_%ddecoder_bestmodel.ckpt" % (FLAGS.hidden_dim, FLAGS.decoder_size)) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: global_test_error = 1000000 best_model = sess model = create_model(sess, FLAGS.encoder_size, FLAGS.decoder_size, FLAGS.hidden_dim, FLAGS.input_dim, \ FLAGS.output_dim, load_model, checkpoint_dir) # trainging for epoch in range(FLAGS.epoches): st = time.time() epoch_loss = 0.0 total_step = train_X.shape[0] / FLAGS.batch_size for step in range(total_step): encode_inputs, decode_inputs = model.get_batch(train_X, train_y, FLAGS.batch_size, step) step_loss, predict_outputs = model.step(sess, encode_inputs, decode_inputs, FLAGS.encoder_size, \ FLAGS.decoder_size, is_training=True) epoch_loss += step_loss if (step+1) % 20 == 0: print 'train(step:%d/%d epoch:%d/%d)'%(step+1,total_step,epoch+1,FLAGS.epoches), '\t', \ predict_outputs[0][0], '\t', decode_inputs[0][0], '\t loss:', step_loss print "train loss %.6f in epoch=%d, time=%f" % (epoch_loss, epoch+1, time.time() - st) # validation if (epoch + 1) % FLAGS.check_per_epoches == 0: print " validation (epoch:%d/%d)" % (epoch+1, FLAGS.epoches) # test dataset test_loss = 0.0 times = 0.0 for step_test in range(len(test_X) / FLAGS.batch_size): encode_inputs, decode_inputs = model.get_batch(test_X, test_y, FLAGS.batch_size, step_test) step_loss, predict_outputs = model.step(sess, encode_inputs, decode_inputs, FLAGS.encoder_size, \ FLAGS.decoder_size, is_training=False) test_loss += step_loss # update min test loss if test_loss < global_test_error: tolerance_count = 0 global_test_error = test_loss model.saver.save(sess, os.path.join(save_model_dir, "%dhidden_%ddecoder_bestmodel.ckpt"%(FLAGS.hidden_dim, FLAGS.decoder_size))) else: tolerance_count += FLAGS.check_per_epoches print "test loss %.6f in epoch=%d" % (test_loss, epoch + 1) print "global min test loss %.6f in epoch=%d" % (global_test_error, epoch + 1) if tolerance_count >= 50: break print 'The final final final global min test error: %f' % global_test_error
def _construct_and_fill_model(self): super()._construct_and_fill_model() device_ids = sly.env.remap_gpu_devices([self._config[GPU_DEVICE]]) num_layers = determine_resnet_model_configuration(TaskPaths.MODEL_CONFIG_PATH) self.model = create_model(num_layers=num_layers, n_cls=(max(self.classification_tags_to_idx.values()) + 1), device_ids=device_ids) self.model = WeightsRW(TaskPaths.MODEL_DIR).load_strictly(self.model) self.model.eval() logger.info('Weights are loaded.')
def train(): conf = Config() # 打印模型配置信息 conf.dump() parser = argparse.ArgumentParser(description='图片分类模型训练') parser.add_argument( '--resume_checkpoint', action='store', type=str, default='model/checkpoint.pth', help='从模型的checkpoint恢复模型,并继续训练,如果resume_checkpoint这个参数提供' '这些参数将忽略--arch, --learning_rate, --hidden_units, and --drop_p') args = parser.parse_args() #加载数据 dataloaders, class_to_idx = load_data(conf.data_directory) #创建模型,如果模型文件存在 if args.resume_checkpoint and os.path.exists(args.resume_checkpoint): #加载checkpoint print('resume_checkpoint已存在,开始加载模型') model, optimizer, epoch, history = load_checkpoint( checkpoint_path=args.resume_checkpoint, load_optimizer=True, gpu=conf.cuda) start_epoch = epoch + 1 else: #创建新模型和优化器 print('resume_checkpoint未设置或模型文件不存在,创建新的模型') model = create_model( arch=conf.arch, class_to_idx=class_to_idx, hidden_units=conf.hidden_units, drop_p=conf.dropout) optimizer = create_optimizer(model=model, lr=conf.learning_rate) start_epoch = 1 history = None #训练模型 history, best_epoch = train_model( dataloaders=dataloaders, model=model, optimizer=optimizer, gpu=conf.cuda, start_epoch=start_epoch, epochs=conf.epochs, train_history=history) #测试集上测试模型 test_acc = test_model(dataloader=dataloaders['test'], model=model, gpu=conf.cuda) print(f'模型在测试集上的准确率是 {(test_acc * 100):.2f}%') #保存模型 save_checkpoint( save_path=conf.save_path+conf.save_name, epoch=best_epoch, model=model, optimizer=optimizer, history=history) #绘制历史记录 plot_history(history)
def _construct_and_fill_model(self): progress_dummy = sly.Progress('Building model:', 1) progress_dummy.iter_done_report() self.model = create_model(self.num_layers, n_cls=len(self.classification_tags_sorted), device_ids=self.device_ids) if sly.fs.dir_empty(sly.TaskPaths.MODEL_DIR): logger.info('Weights will not be inited.') # @TODO: add random init (m.weight.data.normal_(0, math.sqrt(2. / n)) else: wi_type = self.config['weights_init_type'] ewit = {'weights_init_type': wi_type} logger.info('Weights will be inited from given model.', extra=ewit) weights_rw = WeightsRW(sly.TaskPaths.MODEL_DIR) if wi_type == TRANSFER_LEARNING: self.model = weights_rw.load_for_transfer_learning(self.model, ignore_matching_layers=['fc'], logger=logger) elif wi_type == CONTINUE_TRAINING: self.model = weights_rw.load_strictly(self.model) logger.info('Weights are loaded.', extra=ewit)
def train(): print(train_data_path, save_model_dir, FLAGS.hidden_dim) # load data dataset = Data(FLAGS.batch_size, FLAGS.encoder_size, FLAGS.decoder_size, train_data_path, test_data_path) train_X, train_y, test_X, test_y = dataset.train_inputs, dataset.train_labels, dataset.test_inputs, \ dataset.test_labels tolerance_count = 0 checkpoint_dir = os.path.join( save_model_dir, "%dhidden_%ddecoder_bestmodel.ckpt" % (FLAGS.hidden_dim, FLAGS.decoder_size)) with tf.Session() as sess: global_test_error = 1000000 best_model = sess model = create_model(sess, FLAGS.encoder_size, FLAGS.decoder_size, FLAGS.hidden_dim, FLAGS.input_dim, \ FLAGS.output_dim, load_model, checkpoint_dir) # training for epoch in range(FLAGS.epoches): st = time.time() epoch_loss = 0.0 total_step = train_X.shape[0] / FLAGS.batch_size for step in range(int(total_step)): encode_inputs, decode_inputs = model.get_batch( train_X, train_y, FLAGS.batch_size, step) step_loss, predict_outputs = model.step(sess, encode_inputs, decode_inputs, FLAGS.encoder_size, \ FLAGS.decoder_size, is_training=True) epoch_loss += step_loss if step % 20 == 0: print ('train(step:%d/%d epoch:%d/%d)'%(step+1,total_step,epoch+1,FLAGS.epoches), '\t', \ predict_outputs[0][0], '\t', decode_inputs[0][0], '\t loss:', step_loss) print("train loss %.6f in epoch=%d, time=%f" % (epoch_loss, epoch + 1, time.time() - st)) # validation if (epoch + 1) % FLAGS.check_per_epoches == 0: print(" validation (epoch:%d/%d)" % (epoch + 1, FLAGS.epoches)) # test dataset test_loss = 0.0 times = 0.0 for step_test in range(int(len(test_X) / FLAGS.batch_size)): encode_inputs, decode_inputs = model.get_batch( test_X, test_y, FLAGS.batch_size, step_test) step_loss, predict_outputs = model.step(sess, encode_inputs, decode_inputs, FLAGS.encoder_size, \ FLAGS.decoder_size, is_training=False) test_loss += step_loss # update min test loss if test_loss < global_test_error: tolerance_count = 0 global_test_error = test_loss model.saver.save( sess, os.path.join( save_model_dir, "%dhidden_%ddecoder_bestmodel.ckpt" % (FLAGS.hidden_dim, FLAGS.decoder_size))) else: tolerance_count += FLAGS.check_per_epoches print("test loss %.6f in epoch=%d" % (test_loss, epoch + 1)) print("global min test loss %.6f in epoch=%d" % (global_test_error, epoch + 1)) if tolerance_count >= 50: break print('The final final final global min test error: %f' % global_test_error)
sess = tf.Session(config=tf_config) #load or make vocab if os.path.isfile(config.necessary): with open(config.necessary, 'rb') as f: word2idx, pumsa2idx, lemma2idx, char2idx, label2idx, idx2label = pickle.load(f) else: word2idx, pumsa2idx, lemma2idx, char2idx, label2idx, idx2label = get_necessary() if config.pretrained_embeddings: word_embedding_matrix = load_word_embedding_matrix(word2idx) else: word_embedding_matrix = None logger.info("Now creating Model...") SRL_Model = create_model(Model, logger, word2idx, pumsa2idx, char2idx, label2idx, lemma2idx, word_embedding_matrix) if config.mode == "train": ELMo_dict, context_embeddings_op, ELMo_context, ELMo_ids = load_ELMo() sess.run(tf.global_variables_initializer()) train_dataset = load_data(config.train_path) test_dataset = load_data(config.test_path) train_data = prepare_dataset(train_dataset, word2idx, pumsa2idx, char2idx, lemma2idx, label2idx, ELMo_dict) test_data = prepare_dataset(test_dataset, word2idx, pumsa2idx, char2idx, lemma2idx, label2idx, ELMo_dict) print("%i / %i sentences in train / dev " % (len(train_data), len(test_data))) train_manager = BatchManager(train_data, label2idx) test_manager = BatchManager(test_data, label2idx)
# elmo_dict = load_elmo_dict(config["elmo_dict"]) elmo_dict = load_elmo_dict(FLAGS.elmo_dict) else: elmo_dict = None config["num_chars"] = len(char_to_id) config["num_words"] = len(word_to_id) config["num_pumsas"] = len(pumsa_to_id) config["num_tags"] = len(tag_to_id) logger.info("Now creating Model...") model = create_model(Model, FLAGS.ckpt_path, config, logger, word_to_id, pumsa_to_id, char_to_id, tag_to_id, embedding_matrix, ner_morph_tag=ner_morph_tag) if config['elmo']: elmo_dict = load_elmo_dict(FLAGS.elmo_dict) ELMP_model = BidirectionalLanguageModel(config["elmo_options"], config["elmo_weights"]) elmo_ids = tf.placeholder(tf.int32, shape=[None, None, None], name='elmo_ids') context_embeddings_op = ELMP_model(elmo_ids) elmo_context = weight_layers('input', context_embeddings_op, l2_coef=0.0)
# tensorflow config tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) # model config make_path(FLAGS) config = config_model() save_config(config, FLAGS.config_file) log_path = os.path.join("log", FLAGS.log_file) logger = get_logger(log_path) print_config(config, logger) # create model model = create_model(sess, Model, FLAGS.ckpt_path, config, logger) bind_model(sess, FLAGS) if FLAGS.pause and IS_ON_NSML: nsml.paused(scope=locals()) if FLAGS.mode == 'train': dataset = data_loader(FLAGS.DATASET_PATH) train_dataset, dev_dataset = dataset[:-3000], dataset[-3000:] # create dictionary for word _c, char_to_id, id_to_char = char_mapping(train_dataset, FLAGS.lower) # create a dictionary and a mapping for tags # 태그 빈도, tag to id dictionary, id to tag dictionary _, tag_to_id, id_to_tag = tag_mapping(train_dataset)
#%% DATA_PATH = 'data/' INPUT_SHAPE = (32, 32, 3) N_CLASS = 2 EPOCHS = 50 BATCH_SIZE = 10 OPTIMIZER = "adam" LOSS = "categorical_crossentropy" #%% x_train, y_train, x_test, y_test = load_train_test(DATA_PATH) x_train = x_train / 255.0 x_test = x_test / 255.0 print(x_train.shape, y_train.shape) print(x_test.shape, y_test.shape) #%% prepare_inst = create_model(INPUT_SHAPE, N_CLASS, LOSS, OPTIMIZER, True) nn_model = prepare_inst.prepare_nn() nn = nn_model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, validation_data=(x_test, y_test), shuffle=True) save_result(nn, nn_model, DATA_PATH)
########################### (dataloaders, class_to_idx) = load_data(args.data_directory) # Create model ########################### if args.resume_checkpoint: # resume_checkpoint path is provided # load checkpoint (model, optimizer, epoch, history) = load_checkpoint(checkpoint_path=args.resume_checkpoint, load_optimizer=True, gpu=gpu) start_epoch = epoch + 1 else: # create new model and optimizer model = create_model(arch=args.arch, class_to_idx=class_to_idx, hidden_units=args.hidden_units, drop_p=args.drop_p) optimizer = create_optimizer(model=model, lr=args.learning_rate) start_epoch = 1 history = None # Train model ########################### history, best_epoch = train_model(dataloaders=dataloaders, model=model, optimizer=optimizer, gpu=gpu, start_epoch=start_epoch, epochs=args.epochs, train_history=history)
val_images = args.val_images model = args.model summarize = args.summarize normalize = args.n return model_path, out_path, create_new, epochs, batches, \ input_shape, train_images, val_images, model, summarize, normalize if __name__ == '__main__': model_path, out_path, create_new, epochs, batches, \ input_shape, train_images, val_images, model, summarize, normalize = read_arguments() input_shape = (input_shape[0], input_shape[1], 3) if create_new: # As opencv2 considers the number of rows to be the second element of a shape tuple, # this is unfortunately necessary. model = model_utils.create_model( model, (input_shape[1], input_shape[0], input_shape[2])) else: model = model_utils.load_model(model_path) if summarize: model.summary() exit(0) model_utils.train_model(model, batches, epochs, input_shape[:2], train_images_per_batch=train_images, val_images_per_batch=val_images, normalize_images=normalize) model_utils.save_model(model, out_path)
# policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16') tf.keras.mixed_precision.experimental.set_policy(policy) dataset_size = 10000 image_size = 64 images, labels = create_data( dataset_size, image_size) images = tf.cast(images, tf.float32) / 255 labels = tf.cast(labels, tf.float32) / image_size batch_size = 256 train_dataset = tf.data.Dataset.from_tensor_slices( ( images, labels ) ).shuffle(dataset_size).batch(batch_size) model = create_model() MSE = tf.keras.losses.MSE optimizer = tf.keras.optimizers.Adam(1e-4) # adding tf.function decorator lays out the graph statically # rather than the default eager mode, the static graph is faster to compute at runtime @tf.function def train_step(images,labels): with tf.GradientTape() as tape: predictions = model(images) loss = MSE(predictions, labels) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return loss
def train(): parser = argparse.ArgumentParser(description='Train netwotk.') parser.add_argument('data_dir', help='train and test datas directory.') parser.add_argument( 'checkpoint_name', help='name to save the model, if none provided the model is not saved') parser.add_argument('--architecture', default='densenet121', help='architecture to be used') parser.add_argument( '--save_dir', help='name to save the model, if none provided the model is not saved', default='') parser.add_argument('--hidden_units', type=int, help='hidden units for the model, default is 512', default=512) parser.add_argument( '--learningRate', type=float, help='Learning rate to train the model.0.001 is default', default=0.001) parser.add_argument('--epochs', type=int, help='epochs when the model is training', default=2) args = parser.parse_args() data_dir = args.data_dir checkpoint_name = args.checkpoint_name architecture = args.architecture save_dir = args.save_dir hidden_units = args.hidden_units learningRate = args.learningRate epochs = args.epochs print('start training. data_dir is: ' + data_dir) train_dir = data_dir + '/train' test_dir = data_dir + '/test' traindata, trainloader = utils.load_data(True, train_dir) testdata, testloader = utils.load_data(False, test_dir) #Use GPU if it's available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #define the model model = model_utils.create_model(hidden_units, architecture) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learningRate) #model training execution model_utils.train_model(model, trainloader, testloader, criterion, optimizer, device, epochs) #save the model if (save_dir != ''): checkpoint_name = os.path.join(save_dir, checkpoint_name) model_utils.save_model(model, traindata, optimizer, checkpoint_name, epochs, architecture)