def main(): dls = dataloader.get_fold_loaders(k, BATCH_SIZE) for i, d in enumerate(dls): torch.save(d, "loaders/" + str(i) + ".pth") accs = [] norm_confmats = [] confmats = [] best_state_dict_init = torch.load("models/inits/best.pth") for fold in range(k): mod = model.get_pretrained_model(layer_names=setting["layers"], type_init=setting["init"]).to(device) mod.load_state_dict(best_state_dict_init) optim = model.get_optimizer(mod, feature_extract=True, lr=setting["lr"], mom=setting["mom"]) criterion = nn.CrossEntropyLoss() for e in range(EPOCHS): mod, valloss, _, confmat = traintest.trainepoch( mod, dls[fold], criterion, optim, device) valacc = get_acc_from_conf(confmat) if e == EPOCHS - 1: confmats.append(confmat) norm_confmat = normalize(confmat) norm_confmats.append(norm_confmat) accs.append(valacc) torch.save(mod.state_dict(), "models/folds/" + str(fold))
def train(x_train, y_train, x_test, y_test, epochs, batch_size, task_id, population_id, ready_for_exploitation_False, ready_for_exploitation_True, active_False, active_True, connect_str_or_path, intervals_trained, seed_for_shuffling): # Train optimizer = get_optimizer() model = dnn_model() trainer = Trainer( model=model, optimizer=optimizer, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, epochs=epochs, batch_size=batch_size, task_id=task_id) checkpoint_path = (CHECKPOINT_STR % (population_id, task_id)) if os.path.isfile(checkpoint_path): trainer.load_checkpoint(checkpoint_path) interval_is_odd = intervals_trained % 2 == 1 score = None try: try: trainer.train(interval_is_odd, seed_for_shuffling) time.sleep(1) except LossIsNaN: print_with_time("Setting score to -1.") score = -1 if score != -1: score = float(trainer.eval(intervals_trained)) trainer.save_checkpoint(checkpoint_path) key_value_pairs = dict( intervals_trained=intervals_trained + 1, ready_for_exploitation=ready_for_exploitation_True, active=active_False, score=score) update_task(connect_str_or_path, USE_SQLITE, population_id, task_id, key_value_pairs) sess.close() del trainer.model del trainer tf.keras.backend.clear_session() except KeyboardInterrupt: # Don't save work. key_value_pairs = dict(active=active_False) update_task(connect_str_or_path, USE_SQLITE, population_id, task_id, key_value_pairs) sess.close() del trainer.model del trainer tf.keras.backend.clear_session()
def main(): args = parse_args() print("Params:") print(args) print() config = tf.ConfigProto() config.gpu_options.allow_growth = True X = tf.placeholder(tf.float32, [17770, None], name='X') Y = tf.placeholder(tf.float32, [17770, None], name='Y') Yhat, weights = model.autoencoder(X, args.layers, keep_prob=(1.0 - args.dropout), constrained=args.constrained) YhatDev, weights = model.autoencoder(X, args.layers, constrained=args.constrained, weights=weights) loss = model.get_loss(Y, Yhat) loss_sum, loss_examples = model.get_test_loss(Y, Yhat) loss_sum_dev, loss_examples_dev = model.get_test_loss(Y, YhatDev) losses = (loss, loss_sum, loss_examples, loss_sum_dev, loss_examples_dev) optimizer = model.get_optimizer(args.optimizer_type, args.lr, args.momentum) if args.small_dataset: train_path = "../data/netflix/output_small_train" dev_path = "../data/netflix/output_small_dev" test_path = "../data/netflix/output_small_test" else: train_path = "../data/netflix/output_train" dev_path = "../data/netflix/output_dev" test_path = "../data/netflix/output_test" data_train = data_manager.Data(size=args.chunk_size, batch=args.batch_size, path=train_path) data_dev = data_manager.Data(size=args.chunk_size, batch=args.batch_size, path=dev_path, test=True) data_test = data_manager.Data(size=args.chunk_size, batch=args.batch_size, path=test_path, test=True) train_losses, eval_losses = model.train( data_train, data_dev, losses, optimizer, X, Y, Yhat, epochs=args.epochs, dense_refeeding=args.dense_refeeding) model.test(data_test, X, Y, YhatDev) t, = plt.plot([i + 1 for i in range(len(train_losses))], train_losses, label="Train") e, = plt.plot([i + 1 for i in range(len(eval_losses))], eval_losses, label="Dev") plt.legend(handles=[t, e]) plt.xlabel("Epoch") plt.ylabel("Loss") plt.show() print([i + 1 for i in range(len(train_losses))]) print(train_losses) print([i + 1 for i in range(len(eval_losses))]) print(eval_losses)
def main(argv): options = argparser().parse_args(argv[1:]) logger.info(f'train.py arguments: {options}') # word_labels are the labels assigned to words in the original # data, token_labeler.labels() the labels assigned to tokens in # the tokenized data. The two are differentiated to allow distinct # labels to be added e.g. to continuation wordpieces. word_labels = load_labels(options.labels) token_labeler = IobesTokenLabeler(word_labels) num_labels = len(token_labeler.labels()) label_encoder = LabelEncoder(token_labeler.labels()) logger.info(f'token labels: {token_labeler.labels()}') logger.info('loading pretrained model') pretrained_model, tokenizer, config = load_pretrained( options.model_name, cache_dir=options.cache_dir) logger.info('pretrained model config:') logger.info(config) if options.max_seq_length > config.max_position_embeddings: raise ValueError(f'--max_seq_length {options.max_seq_length} not ' f'supported by model') seq_len = options.max_seq_length encode_tokens = lambda t: tokenizer.encode(t, add_special_tokens=False) document_loader = ConllLoader(tokenizer.tokenize, token_labeler.label_tokens, options.separator) example_generator = EXAMPLE_GENERATORS[options.examples]( seq_len, Token(tokenizer.cls_token, is_special=True, masked=False), Token(tokenizer.sep_token, is_special=True, masked=False), Token(tokenizer.pad_token, is_special=True, masked=True), encode_tokens, label_encoder.encode) train_documents = document_loader.load(options.train_data) dev_documents = document_loader.load(options.dev_data) # containers instead of generators for statistics train_documents = list(train_documents) dev_documents = list(dev_documents) log_dataset_statistics('train', train_documents) log_dataset_statistics('dev', dev_documents) decoder = ViterbiDecoder(label_encoder.label_map) decoder.estimate_probabilities(train_documents) logger.info(f'init_prob:\n{decoder.init_prob}') logger.info(f'trans_prob:\n{decoder.trans_prob}') train_examples = example_generator.examples(train_documents) dev_examples = example_generator.examples(dev_documents) # containers instead of generators for len() and logging train_examples = list(train_examples) dev_examples = list(dev_examples) num_train_examples = len(train_examples) log_examples(train_examples, count=2) train_x, train_y = examples_to_inputs(train_examples) dev_x, dev_y = examples_to_inputs(dev_examples) ner_model = build_ner_model(pretrained_model, num_labels, seq_len) optimizer, lr_schedule = get_optimizer( options.lr, options.num_train_epochs, options.batch_size, options.warmup_proportion, num_train_examples, ) ner_model.compile( optimizer=optimizer, loss='sparse_categorical_crossentropy', sample_weight_mode='temporal', # TODO is this necessary? metrics=['sparse_categorical_accuracy']) logger.info('ner model:') ner_model.summary(print_fn=logger.info) lr_history = LRHistory(lr_schedule) history = ner_model.fit(train_x, train_y, epochs=options.num_train_epochs, batch_size=options.batch_size, validation_data=(dev_x, dev_y), callbacks=[lr_history]) for k, v in history.history.items(): logger.info(f'{k} history: {v}') logger.info(f'lr history: {lr_history.by_epoch}') dev_predictions = ner_model.predict(dev_x, verbose=1, batch_size=options.batch_size) assert len(dev_examples) == len(dev_predictions) for example, preds in zip(dev_examples, dev_predictions): assert len(example.tokens) == len(preds) for pos, (token, pred) in enumerate(zip(example.tokens, preds)): token.predictions.append((pos, pred)) documents = unique(t.document for e in dev_examples for t in e.tokens if not t.is_special) check_predictions(documents) for n, r in evaluate_assign_labels_funcs(documents, label_encoder).items(): print(f'{n}: prec {r.prec:.2%} rec {r.rec:.2%} f {r.fscore:.2%}') summarize_predictions = PREDICTION_SUMMARIZERS[options.summarize_preds] assign_labels = LABEL_ASSIGNERS[options.assign_labels] for document in documents: summarize_predictions(document) assign_labels(document, label_encoder) for n, r in evaluate_viterbi(documents, decoder.init_prob, decoder.trans_prob, label_encoder).items(): print(f'{n}: prec {r.prec:.2%} rec {r.rec:.2%} f {r.fscore:.2%}') for document in documents: assign_labels(document, label_encoder) # greedy print(conlleval_report(documents)) if options.output_file is not None: with open(options.output_file, 'w') as out: write_conll(documents, out=out) if options.ner_model_dir is not None: save_ner_model(options.ner_model_dir, ner_model, decoder, tokenizer, word_labels, config) return 0
def __init__(self, net, dname, dropout, l_r, loss, optimizer, scheduler, size, batch_size, n_workers, augm_config, save_dir, mixup_coeff, cutout_params, total_epochs, SRV, classes=[[0], [1]], pretrained=True, no_logs=False, optimize_temp_scal=False, drop_last=True, copy_into_tmp=False, pretrained_isic=False): # Hyper-parameters self.net = net self.dropout = dropout self.dname = dname if classes is None: self.classes = [[0], [1]] elif len(classes) == 1: self.classes = [[c] for c in classes[0]] else: self.classes = classes self.num_classes = len(self.classes) self.learning_rate = l_r self.lossname = loss self.optname = optimizer self.schedname = scheduler self.size = size self.batch_size = batch_size self.n_workers = n_workers self.augm_config = augm_config self.pretrained = pretrained self.save_dir = save_dir self.best_auc = 0.0 self.mixup_coeff = mixup_coeff self.cutout_nholes = cutout_params[0] self.cutout_pad_size = cutout_params[1] self.SRV = SRV self.no_logs = no_logs self.optimize_temp_scal = optimize_temp_scal self.copy_into_tmp = copy_into_tmp self.pretrained_isic = pretrained_isic self.nname = self.net + '_ISIC2019' + ('_pretrained' if self.pretrained_isic else '') if self.dropout: self.nname = 'dropout_' + self.nname self.n = get_model(self.net, self.pretrained, self.num_classes, self.dropout, self.size) self.temp_scal_model = None if optimize_temp_scal: self.temp_scal_model = TemperatureScaling().to( 'cuda') # no wrapping for efficiency in training self.data_loader, self.test_data_loader, self.valid_data_loader = get_dataloader( dname=self.dname, size=self.size, dataset_classes=self.classes, SRV=self.SRV, batch_size=self.batch_size, n_workers=self.n_workers, augm_config=self.augm_config, cutout_params=cutout_params, drop_last_flag=drop_last, copy_into_tmp=self.copy_into_tmp) self.criterion = get_criterion(self.lossname, [[0], [1]]) # self.classes self.optimizer = get_optimizer(self.n, self.learning_rate, self.optname) self.scheduler = get_scheduler(self.optimizer, self.schedname) # to measure calibration stuff predictions_train = torch.zeros(len(self.data_loader.dataset), self.num_classes).float() labels_train = torch.zeros(len(self.data_loader.dataset), ).long() predictions_valid = torch.zeros(len(self.valid_data_loader.dataset), self.num_classes).float() labels_valid = torch.zeros(len( self.valid_data_loader.dataset), ).long() predictions_test = torch.zeros(len(self.test_data_loader.dataset), self.num_classes).float() labels_test = torch.zeros(len(self.test_data_loader.dataset), ).long() self.calibration_variables = [[predictions_train, labels_train], [predictions_valid, labels_valid], [predictions_test, labels_test]] if mixup_coeff > 0.0: self.data_loader = [self.data_loader] dl, _, _ = get_dataloader(dname=self.dname, size=self.size, SRV=self.SRV, batch_size=self.batch_size, n_workers=self.n_workers, augm_config=self.augm_config, cutout_params=cutout_params) self.data_loader.append(dl) # logger if not self.no_logs: model_log_dir = os.path.join( self.save_dir, self.get_model_filename(total_epochs, classes=True) + '_logger.log') logging.basicConfig(filename=model_log_dir, level=logging.INFO) self.logger = logging
def exploit_and_explore(connect_str_or_path, population_id): global np intervals_trained_col = get_col_from_populations( connect_str_or_path, USE_SQLITE, population_id, "intervals_trained") intervals_trained_col = np.array(intervals_trained_col) if not np.all( intervals_trained_col == intervals_trained_col[0]): msg = """The exploiter seems to be exploiting before all the models have finished training. Check for bad race conditions with respect to the database.""" raise Exception(msg) # Sorted by scores, desc task_ids, scores = get_task_ids_and_scores(connect_str_or_path, USE_SQLITE, population_id) print_with_time("Exploiting interval %s. Best score: %.4f" % (intervals_trained_col[0] - 1, max(scores))) seed_for_shuffling = np.random.randint(10 ** 5) fraction = 0.20 cutoff = int(np.ceil(fraction * len(task_ids))) top_ids = task_ids[:cutoff] bottom_ids = task_ids[len(task_ids) - cutoff:] nonbottom_ids = task_ids[:len(task_ids) - cutoff] for bottom_id in bottom_ids: top_id = np.random.choice(top_ids) model = dnn_model() optimizer = get_optimizer() top_trainer = Trainer(model=model, optimizer=optimizer, ) top_checkpoint_path = (CHECKPOINT_STR % (population_id, top_id)) top_trainer.load_checkpoint(top_checkpoint_path) model = dnn_model() optimizer = get_optimizer() bot_trainer = Trainer(model=model, optimizer=optimizer) bot_checkpoint_path = (CHECKPOINT_STR % (population_id, bottom_id)) # TODO BUG bot_trainer.load_checkpoint(bot_checkpoint_path) bot_trainer.exploit_and_explore(top_trainer, HYPERPARAM_NAMES) bot_trainer.save_checkpoint(bot_checkpoint_path) key_value_pairs = dict( ready_for_exploitation=ready_for_exploitation_False, score=None, seed_for_shuffling=seed_for_shuffling) update_task(connect_str_or_path, USE_SQLITE, population_id, bottom_id, key_value_pairs) for nonbottom_id in nonbottom_ids: key_value_pairs = dict( ready_for_exploitation=ready_for_exploitation_False, seed_for_shuffling=seed_for_shuffling) update_task(connect_str_or_path, USE_SQLITE, population_id, nonbottom_id, key_value_pairs) del trainer.model del trainer tf.keras.backend.clear_session()
def __init__(self, net, dname, dropout, l_r, loss, optimizer, scheduler, size, batch_size, n_workers, augm_config, save_dir, cutout_params, total_epochs, classes=[[0], [1], [2], [3], [4], [5], [6], [7]], pretrained=True, no_logs=False, optimize_temp_scal=False, drop_last=True): # Hyper-parameters self.net = net self.dropout = dropout self.dname = dname if classes is None: self.classes = [[0], [1], [2], [3], [4], [5], [6], [7]] elif len(classes) == 1: self.classes = [[c] for c in classes[0]] else: self.classes = classes self.num_classes = len(self.classes) self.learning_rate = l_r self.lossname = loss self.optname = optimizer self.schedname = scheduler self.size = size self.batch_size = batch_size self.n_workers = n_workers self.augm_config = augm_config self.pretrained = pretrained self.save_dir = save_dir self.best_acc = 0.0 self.cutout_nholes = cutout_params[0] self.cutout_pad_size = cutout_params[1] self.no_logs = no_logs self.optimize_temp_scal = optimize_temp_scal self.nname = self.net + '_ISIC2019' if self.dropout: self.nname = 'dropout_' + self.nname self.n = get_model(self.net, self.pretrained, self.num_classes, self.dropout, self.size) self.temp_scal_model = None if optimize_temp_scal: self.temp_scal_model = TemperatureScaling().to( 'cuda') # no wrapping for efficiency in training self.data_loader, self.test_data_loader, self.valid_data_loader = get_dataset( dname=self.dname, size=self.size, dataset_classes=self.classes, batch_size=self.batch_size, n_workers=self.n_workers, augm_config=self.augm_config, cutout_params=cutout_params, drop_last_flag=drop_last) self.criterion = get_criterion(self.lossname, self.classes) self.optimizer = get_optimizer(self.n, self.learning_rate, self.optname) self.scheduler = get_scheduler(self.optimizer, self.schedname) # to measures calibration stuff predictions_train = torch.zeros(len(self.data_loader.dataset), self.num_classes).float() labels_train = torch.zeros(len(self.data_loader.dataset), ).long() predictions_valid = torch.zeros(len(self.valid_data_loader.dataset), self.num_classes).float() labels_valid = torch.zeros(len( self.valid_data_loader.dataset), ).long() predictions_test = torch.zeros(len(self.test_data_loader.dataset), self.num_classes).float() labels_test = torch.zeros(len(self.test_data_loader.dataset), ).long() self.calibration_variables = [[predictions_train, labels_train], [predictions_valid, labels_valid], [predictions_test, labels_test]] # logger if not self.no_logs: model_log_dir = os.path.join( self.save_dir, self.nname + '_epoch.' + str(total_epochs) + '_augmentidx.' + str(self.augm_config) + '_cutout.holes' + str(self.cutout_nholes) + '.pad.' + str(self.cutout_pad_size) + '.classes.' + str(self.classes) + '_logger.log') logging.basicConfig(filename=model_log_dir, level=logging.INFO) self.logger = logging
def train_neural_network(): tf.reset_default_graph() with tf.Session() as sess: sess = tf_debug.LocalCLIDebugWrapperSession(sess) # initialize lookup table table = initialize_lookup_table() train_feature_filenames, train_label_filenames = get_filenames() with tf.name_scope('raw_inputs'): features, raw_labels = input.getFiles(train_feature_filenames, train_label_filenames) with tf.name_scope('processed_labels'): labels = preprocess_labels(raw_labels, table) output, test_output, test_features, test_labels = model.create_model( features, labels) with tf.name_scope('loss'): loss = model.get_loss(output, labels) with tf.name_scope('training_accuracy'): training_accuracy = model.compute_accuracy(output, labels) with tf.name_scope('dev_accuracy'): dev_accuracy = model.compute_accuracy(test_output, test_labels) train_step = model.get_optimizer(loss) training_fetches = [ features, raw_labels, labels, output, loss, training_accuracy, train_step ] # initialize variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # add graph summary for tensorboard writer = tf.summary.FileWriter(constants.TENSORBOARD_DIR, sess.graph) # start queue runner for data loading coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # get dev features dev_features, dev_labels = sess.run([features, labels]) # check if we received the labels correctly or not print dev_labels for epoch in range(1, constants.EPOCHS + 1): for batch in range(1, constants.NUM_BATCHES + 1): # train the model model_features, model_raw_labels, model_labels, model_output, model_loss, model_accuracy, _ = sess.run( training_fetches) print "Epoch {}/{} ; Batch {}/{} ; Accuracy {} ; Loss {}".format( epoch, constants.EPOCHS, batch, constants.NUM_BATCHES, model_accuracy, model_loss) print model_output # evaluate the accuracy if (batch % constants.TEST_PERIOD == 0): mdev_accuracy = sess.run(dev_accuracy, feed_dict={ test_features: dev_features, test_labels: dev_labels })
def run(setting, n, save_dir, folder, early_stop=True, split=0.75, init_dict=None): name = convert_to_name(setting) model_save_dir = save_dir + '/' history = { "loss": [], "acc": [], "normacc": [], "ipacc": [], "npacc": [], "confmat": [], "best_avg": 0 } mod = model.get_pretrained_model(layer_names=setting["layers"], type_init=setting["init"]).to(device) if init_dict is not None: mod.load_state_dict(init_dict) optim = model.get_optimizer(mod, feature_extract=True, lr=setting["lr"], mom=setting["mom"]) criterion = nn.CrossEntropyLoss() Path(model_save_dir + name + "/" + n).mkdir(parents=True, exist_ok=True) torch.save(mod.state_dict(), model_save_dir + name + "/" + n + '/epoch_0') stop = False if early_stop: dataloaders = dataloader.get_loaders(BATCH_SIZE, split) while not stop: print(stopcrit.checks) mod, valloss, valacc, confmat = traintest.trainepoch( mod, dataloaders, criterion, optim, device) #normalacc, ipacc, npacc = accs_from_confmat(confmat) history["loss"].append(valloss) history["acc"].append(valacc) #history["normacc"].append(normalacc) #history["ipacc"].append(ipacc) #history["npacc"].append(npacc) history["confmat"].append(confmat) stop = stopcrit.check(valacc, mod.state_dict()) else: dataloaders = dataloader.get_loaders(BATCH_SIZE, split) for epoch in range(EPOCHS): if split == 1.0: validate = False else: validate = True mod, valloss, valacc, confmat = traintest.trainepoch( mod, dataloaders, criterion, optim, device, validate) if valloss is not None: #normalacc, ipacc, npacc = accs_from_confmat(confmat) history["loss"].append(valloss) history["acc"].append(valacc) #history["normacc"].append(normalacc) #history["ipacc"].append(ipacc) #history["npacc"].append(npacc) history["confmat"].append(confmat) stop = stopcrit.check(valacc, mod.state_dict()) if split != 1.0: history["best_avg"] = stopcrit.last_avg torch.save( stopcrit.best_model_dict, model_save_dir + name + "/" + n + '/epoch_' + str(stopcrit.best_check)) plot_run(name, n, history, folder) best_acc = stopcrit.best_val best_epoch = stopcrit.best_check stopcrit.reset() else: torch.save(mod.state_dict(), model_save_dir + name + "/" + n + "/epoch_" + str(EPOCHS)) best_acc = None best_epoch = None return history, best_acc, best_epoch
def main(): if len(sys.argv) != 7: print( "Usage: {0} <data directory> <hidden layer size> <min song length> <steps> <epochs> <batch_size>" .format(sys.argv[0])) exit(2) path = sys.argv[1] hidden_size = int(sys.argv[2]) min_len = int(sys.argv[3]) steps = int(sys.argv[4]) epochs = int(sys.argv[5]) batch_size = int(sys.argv[6]) all_songs = get_songs(path) print('Preprocessed Songs') total_songs = len(all_songs) input_size = all_songs[0].shape[1] output_size = input_size rnn_units = hidden_size learning_rate = 0.001 keep_probability = 0.6 disp = 1 print(total_songs, input_size) print(all_songs[0].shape) model_inputs, model_targets, keep_prob, lr = model_placeholders( input_size, output_size, steps) parameters = model_parameters(output_size, hidden_size) #w1, b1 final_outputs, prediction = rnn_layer(model_inputs, parameters, rnn_units, keep_prob, steps) loss = get_loss(final_outputs, model_targets) optimizer = get_optimizer(loss, lr) accuracy = get_accuracy(model_targets, prediction) init = tf.global_variables_initializer() session = tf.Session() print('Start Training') with session as sess: sess.run(init) for epoch in range(epochs): inputs, targets = generate_batches(all_songs, batch_size, steps, input_size, output_size) feed_dict = { model_inputs: inputs, model_targets: targets, keep_prob: keep_probability, lr: learning_rate } sess.run(optimizer, feed_dict=feed_dict) if epoch % disp == 0 or epoch == 10: l, a = sess.run([loss, accuracy], feed_dict=feed_dict) s = 'Epoch: {}, Loss: {:.4f}, Accuracy: {:.3f} \n'.format( epoch, l, a) logger(epoch, epochs, s=s) # Generate new midi files get_random = False idx = 11 if get_random else np.random.randint(total_songs) song = all_songs[idx][:steps].tolist() print('Sampling new music') for i in range(100): initial = np.array([song[-steps]]) sample = sess.run(prediction, feed_dict={model_inputs, initial}) new_songs = sample_music(sample, output_size, song) sample_midi(new_songs, name='gen_1') sample_midi(all_songs[idx], name='base_1')
import tensorflow as tf import input import model with tf.variable_scope("input"): filenames, labels = input.get_filenames_labels(12500, .90, True, "../train_preprocessed2") x, y_ = input.input_pipeline(filenames, labels, 80) with tf.variable_scope("model") as scope: y = model.model(x, True) with tf.variable_scope("optimizer"): loss = model.get_loss(y, y_) optimizer = model.get_optimizer(loss) with tf.variable_scope("error"): error = model.get_error(y, y_) saver = tf.train.Saver() with tf.variable_scope("summary"): logs_path = "../logs" merged_summary_op = model.get_summary_op(x, loss, error) sv = tf.train.Supervisor(logdir="../logs", init_op=tf.global_variables_initializer(), summary_op=merged_summary_op, saver=saver, save_summaries_secs=60, save_model_secs=600)
def run_train(): """Train CAPTCHA for a number of steps.""" train_data = dataset.read_data_sets() with tf.Graph().as_default(): images_placeholder, y_placeholder, z_placeholder = placeholder_inputs() d_logits_real, d_logits_fake = model.inference(images_placeholder, z_placeholder, y_placeholder) demo_noise = np.random.uniform(-1, 1, size=(100, 100)) demo_label = get_demo_label() demo_img = model.generator(z_placeholder, y_placeholder, reuse=True) g_loss, d_loss = model.loss(d_logits_real, d_logits_fake) tf.summary.scalar('g_loss', g_loss) tf.summary.scalar('d_loss', d_loss) summary = tf.summary.merge_all() train_op = model.get_optimizer(g_loss, d_loss) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() summary_writer = tf.summary.FileWriter(train_dir, sess.graph) sess.run(init_op) try: max_step = 100 * 70000 // batch_size for step in range(1, max_step): start_time = time.time() feed_dict = fill_feed_dict(train_data, images_placeholder, y_placeholder, z_placeholder) _, gloss_value, dloss_value = sess.run( [train_op, g_loss, d_loss], feed_dict=feed_dict) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() duration = time.time() - start_time if step % 10 == 0: print( '>> Step %d run_train: g_loss = %.2f d_loss = %.2f(%.3f sec)' % (step, gloss_value, dloss_value, duration)) #------------------------------- if step % 100 == 0: demo_result = sess.run(demo_img, feed_dict={ z_placeholder: demo_noise, y_placeholder: demo_label }) save_images(demo_result, step) print('>> %s Saving in %s' % (datetime.now(), checkpoint_dir)) saver.save(sess, checkpoint_file, global_step=step) except KeyboardInterrupt: print('INTERRUPTED') finally: saver.save(sess, checkpoint_file, global_step=step) print('Model saved in file :%s' % checkpoint_dir) sess.close()