def main(training_from_scratch, args): if (training_from_scratch): text = open(args.filename, 'rb').read().decode(encoding='utf-8') text, char2idx, idx2char = preprocessing( text, args.checkpoint_dir, args.minocc) # note that we are replacing the text here vocab_size = len(idx2char) config = Config(vocab_size, args.epochs) model = build_model(config) else: model = tf.keras.models.load_model(args.checkpoint) char2idx = unpickle(args.checkpoint_dir, 'char2idx') idx2char = unpickle(args.checkpoint_dir, 'idx2char') text = unpickle(args.checkpoint_dir, 'dataset') vocab_size = len(idx2char) config = Config(vocab_size, args.epochs, args.initepochs) text_as_int = np.array([char2idx[c] for c in text ]) # works because text is a list of words train_model(args.checkpoint_dir, text_as_int, model, config)
def train_lstm_model(train_iter, val_iter, test_iter, text, hidden_size, num_layers, dropout): model = LSTMLanguageModel(text, hidden_size, num_layers, dropout).to(util.DEVICE) model.train() model.reset_contexts(train_iter.batch_size) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() util.train_model(model, train_iter, val_iter, optimizer, criterion) model.load_state_dict(torch.load('best_model.pt')) model.eval() model.reset_contexts(val_iter.batch_size) val_perplexity = util.evaluate_perplexity(model, val_iter, True) print('Validation perplexity: %s' % val_perplexity) model.reset_contexts(test_iter.batch_size) test_perplexity = util.evaluate_perplexity(model, test_iter, True) print('Test perplexity: %s' % test_perplexity)
def main(training_from_scratch, args): ### opening the file ### text = open(args.filename, 'rb').read().decode(encoding='utf-8') vocab_size = len(set(text)) config = Config(vocab_size, args.epochs, args.initepochs) ### looking at shit ### print('Length of text: {} characters'.format(len(text))) print(text[:250]) ### creating vocab, converting text to long integer sequence ### char2idx, idx2char = create_vocab_from_file(text, args.checkpoint_dir) text_as_int = np.array([char2idx[c] for c in text]) if (training_from_scratch): model = build_model(config) else: model = tf.keras.models.load_model(args.checkpoint) train_model(args.checkpoint_dir, text_as_int, model, config)
def run_model() -> None: "Execute model according to the configuration" print('#' * 5, 'PARAMETERS', '#' * 5) print_args(ARGS) print('#' * 10, '\n\n') # Which model to use? build_fn, reader_type = common.get_modelfn_reader() reader = common.create_reader(reader_type) def optimiser(model: Model) -> torch.optim.Optimizer: return AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3) # Create SAVE_FOLDER if it doesn't exist ARGS.SAVE_PATH.mkdir(exist_ok=True, parents=True) train_dataset = load_data(data_path=ARGS.TRAIN_DATA_PATH, reader=reader, pre_processed_path=ARGS.TRAIN_PREPROCESSED_PATH) val_dataset = load_data(data_path=ARGS.VAL_DATA_PATH, reader=reader, pre_processed_path=ARGS.VAL_PREPROCESSED_PATH) test_dataset = load_data(data_path=ARGS.TEST_DATA_PATH, reader=reader, pre_processed_path=ARGS.TEST_PREPROCESSED_PATH) model = train_model(build_fn, train_data=train_dataset, val_data=val_dataset, test_data=test_dataset, save_path=ARGS.SAVE_PATH, num_epochs=ARGS.NUM_EPOCHS, batch_size=ARGS.BATCH_SIZE, optimiser_fn=optimiser, cuda_device=ARGS.CUDA_DEVICE, sorting_keys=reader.keys) common.evaluate(model, reader, test_dataset) result = make_prediction(model, reader, verbose=False) common.error_analysis(model, test_dataset) print('Save path', ARGS.SAVE_PATH) cuda_device = 0 if is_cuda(model) else -1 test_load(build_fn, reader, ARGS.SAVE_PATH, result, cuda_device)
import util import net if __name__ == '__main__': parser = util.default_parser('MLP Example') args = parser.parse_args() # get the dataset (default is MNIST) train, test = util.get_dataset(args.dataset) # initialize model model = net.ConvNet(n_filters=16, n_out=10) # train model util.train_model(model, train, test, args) # get test accuracy acc = util.accuracy(model, test, gpu=args.gpu) print 'Model accuracy: ', acc # generate and save C model as a header file model.generate_c('simple.h', train._datasets[0].shape[1:])
# TODO: Load a pre-trained network and configure model, criterion, optimizer, classifier = util.load_model( arguments.arch, arguments.hidden_units, arguments.output_size, arguments.learning_rate) print(model, criterion, optimizer) logging.info('Modelo configurado com sucesso!') #Use GPU if it's available device = util.choose_device(arguments.gpu) print(device) logging.info("%s selecionado.", device) # TODO: Train de model logging.info('Inicio do treinamento!') util.train_model(model, trainloader, validloader, arguments.epochs, criterion, optimizer, device) logging.info('Modelo treinado com sucesso!') # TODO: Do validation on the test set accuracy = util.accuracy_network(model, testloader, criterion, device) logging.info(f'Acurracy foi medido em {accuracy} com sucesso!') # TODO: Save the checkpoint file = util.save_checkpoint(model, train_data, optimizer, arguments.save_dir, arguments.arch, arguments.output_size, classifier, arguments.learning_rate, arguments.batch_size, arguments.epochs) logging.info("Arquivo checkpoint %s foi salvo.", file) logging.info('Fim da configuração do modelo. Parabéns!!!')
def run_experiment(experiment_config: Dict, save_weights: bool, gpu_ind: int, use_wandb: bool = True): """ Run a training experiment. Parameters ---------- experiment_config (dict) Of the form { "dataset": "EmnistLinesDataset", "dataset_args": { "max_overlap": 0.4, "subsample_fraction": 0.2 }, "model": "LineModel", "network": "line_cnn_all_conv", "network_args": { "window_width": 14, "window_stride": 7 }, "train_args": { "batch_size": 128, "epochs": 10 } } save_weights (bool) If True, will save the final model weights to a canonical location (see Model in models/base.py) gpu_ind (int) specifies which gpu to use (or -1 for first available) use_wandb (bool) sync training run to wandb """ print( f"Running experiment with config {experiment_config} on GPU {gpu_ind}") datasets_module = importlib.import_module("text_recognizer.datasets") dataset_class_ = getattr(datasets_module, experiment_config["dataset"]) dataset_args = experiment_config.get("dataset_args", {}) dataset = dataset_class_(**dataset_args) dataset.load_or_generate_data() print(dataset) models_module = importlib.import_module("text_recognizer.models") model_class_ = getattr(models_module, experiment_config["model"]) networks_module = importlib.import_module("text_recognizer.networks") network_fn_ = getattr(networks_module, experiment_config["network"]) network_args = experiment_config.get("network_args", {}) model = model_class_( dataset_cls=dataset_class_, network_fn=network_fn_, dataset_args=dataset_args, network_args=network_args, ) print(model) experiment_config["train_args"] = { **DEFAULT_TRAIN_ARGS, **experiment_config.get("train_args", {}), } experiment_config["experiment_group"] = experiment_config.get( "experiment_group", None) experiment_config["gpu_ind"] = gpu_ind train_model( model, dataset, epochs=experiment_config["train_args"]["epochs"], batch_size=experiment_config["train_args"]["batch_size"], use_wandb=use_wandb, ) score = model.evaluate(dataset.x_test, dataset.y_test) print(f"Test evaluation: {score}") if save_weights: model.save_weights()
def train(): test_model = ensemble.GradientBoostingClassifier() person_table, condition_occurrence_table, outcome_cohort_table, measurement_table = util.load_data_set(TRAIN_DIR) measurement_table = util.preprocess_measurement(measurement_table) test_model = util.train_model(test_model,person_table, condition_occurrence_table, measurement_table, outcome_cohort_table) pickle.dump(test_model, open(os.path.join(VOL_DIR,'model.dat'),'wb')) # 데이터 입력
# ***** SET MODEL ***** # model = UNet(1, depth=5, merge_mode='concat').cuda(0) # Alternative implementation # ipdb.set_trace() model = UNet2(3, 2, learn_weights=args.learnWeights, softmax=False) # Kaggle notebook implementation model = nn.DataParallel(model).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=0.2 * 1e-3) lossFunc = util.soft_dice_loss # lossFunc = util.soft_dice_weighted_loss # model = train_model(model, lossFunc, args.numEpochs) model = util.train_model(model, optimizer, lossFunc, dataloader, validdataloader, args) util.save_model(model, args.modelName) ############################################################################################################## #util.plot_results_for_images(model,dataloader) # ***** EVALUATION ******** if 0: testset = loadData.TestDataset(TEST_PATH, test_trans) testdataloader = t.utils.data.DataLoader(testset, num_workers=2, batch_size=1) # make predictions for all test samples
def main(): cfgs = _init_() cfgs = cfgs['Train'] model_name = cfgs['model_name'] feature_extract = cfgs['feature_extract'] num_classes = cfgs['num_classes'] batch_size = cfgs['batch_size'] pretrained = cfgs['pretrain'] model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=pretrained) start_epoch = cfgs['start_epoch'] if cfgs['loadPt'] != None: print('load pre model') checkpoint = torch.load(cfgs['loadPt']) state_dict = {} for key in checkpoint['model_state_dict'].keys(): state_dict[key[7:]] = checkpoint['model_state_dict'][key] model_ft.load_state_dict(state_dict) start_epoch = checkpoint['epoch'] print('start_epoch', start_epoch) print(model_ft) # Data augmentation and normalization for training # Just normalization for validation data_transforms = { 'train': transforms.Compose([ # transforms.RandomResizedCrop(input_size), transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(cfgs['mean'], cfgs['std']) ]), 'val': transforms.Compose([ transforms.Resize(input_size), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize(cfgs['mean'], cfgs['std']) ]), } print("Initializing Datasets and Dataloaders...") # Create training and validation datasets #image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']} image_datasets = {} image_datasets['train'] = TrainDataset(list_file=cfgs['trainFile'], transform=data_transforms['train']) image_datasets['val'] = TrainDataset(list_file=cfgs['valFile'], transform=data_transforms['val']) # Create training and validation dataloaders dataloaders_dict = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=12) for x in ['train', 'val'] } # Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model_ft = nn.DataParallel(model_ft) # Send the model to GPU model_ft = model_ft.to(device) print(device) # Gather the parameters to be optimized/updated in this run. If we are # finetuning we will be updating all parameters. However, if we are # doing feature extract method, we will only update the parameters # that we have just initialized, i.e. the parameters with requires_grad # is True. params_to_update = model_ft.parameters() print("Params to learn:") if feature_extract: params_to_update = [] for name, param in model_ft.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) else: for name, param in model_ft.named_parameters(): if param.requires_grad == True: print("\t", name) # Observe that all parameters are being optimized if cfgs['optim'] == 'sgd': optimizer_ft = optim.SGD(params_to_update, lr=cfgs['lr'], momentum=cfgs['momentum']) elif cfgs['optim'] == 'adam': optimizer_ft = optim.Adam(params=params_to_update, lr=cfgs['lr'], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) # Setup the loss fxn criterion = nn.CrossEntropyLoss() #scheduler # scheduler = MultiStepLR(optimizer_ft,milestones = [10,40]) # Train and evaluate model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, start_epoch=start_epoch, savePath=cfgs['savePath'], num_epochs=cfgs['epoch'], is_inception=(model_name == "inception"), device=device) print(hist) torch.save( { 'epoch': hist.index(max(hist)) + 1, 'model_state_dict': model_ft.state_dict(), 'optimizer_state_dict': optimizer_ft.state_dict() }, '%s-best.pt' % cfgs['savePath'])
metavar='N', help='number of layers') parser.add_argument('--hidden_size', type=int, default=80, metavar='N', help='number of hidden size') parser.add_argument('--num_classes', type=int, default=3, metavar='N', help='number of classes') args = parser.parse_args() # print args.cut # print args.train_csv_path # print args.train_wav_path ######################## cut raw signal if args.prepare: cut(args.read_path,args.train_csv_path,args.train_wav_path, args.redimension_train_path,args.redimension_validation_path,args.padding_path,args.split_ratio) if args.extract: extract(args.read_path,args.redimension_train_path,args.redimension_validation_path,args.mfcc_length, args.sample_size,args.sample_rate,args.length,args.width,args.image_train_path,args.image_validation_path) #~~~~~~~~~~~~~~~~~~~~~~~~~~~Loading data # # types = ['mfcc','wavelet','rawSignal'] train_model(args.features_type,args.arc,args.hidden_size,args.num_layers,args.num_classes, args.drop_out,args.lr,args.batch_size,args.epochs,args.split_ratio,args.length, args.width,args.momentum,args.optimizer,args.image_train_path,args.image_validation_path)
TESTING_DATA, TESTING_LABELS = generate_disc_set(1000) ######### # Global training parameters ######### NB_EPOCHS = 300 LR = 0.05 MINIBATCH_SIZE = 100 MODEL = MODEL_BEST CRITERION = LossMSE(MODEL) # Computing print("##### Training model#####") _, MODEL_TRAINED = train_model(MODEL, TRAINING_DATA, TRAINING_LABELS, criterion=CRITERION, learning_rate=LR, nb_epochs=NB_EPOCHS, minibatch_size=MINIBATCH_SIZE) TRAINING_ERROR = compute_nb_errors(MODEL_TRAINED, TRAINING_DATA, TRAINING_LABELS) TESTING_ERROR = compute_nb_errors(MODEL_TRAINED, TESTING_DATA, TESTING_LABELS) print("***** Errors *****") print("Training: {:.4f}%".format(100*TRAINING_ERROR/len(TRAINING_DATA))) print("Testing: {:.4f}%".format(100*TESTING_ERROR/len(TESTING_DATA))) print("******************\n\n")
def run_experiment(experiment_config: Dict, save: bool, gpu_ind: int): """ experiment_config is of the form { "dataset": "DTDDataset", "dataset_args": { "data_dir": "../Dropbox/benchmark/dtd/", "input_size": 256 "split": 7 }, "model": "TextureModel", "network": "deepten", "network_args": { "encode_K": 32, "dropout": 0.25 }, "train_args": { "batch_size": 16, "epochs": 50, "flags": ["TENSORBOARD", "CYCLIC_LR"] "flag_args": {"lr_low": 1e-4, "lr_high": 1e-2} } "optimizer_args": { "optimizer": "SGD", "lr": 10e-3, "momentum": 0.9 } } save_weights: if True, will save the final model weights to a canonical location (see TextureModel in models/base.py) gpu_ind: integer specifying which gpu to use If "LR_RAMP" or "CYCLIC_LR" in ["train_args"]["flags"], then a LearningRateScheduler callback will be created and SGD optimizer will be used. Otherwise, Adam will be used. In either case, optimizer_args should only contain valid keyword args for the given optimizer. """ print( f'Running experiment with config {experiment_config} on GPU {gpu_ind}') datasets_module = importlib.import_module('texture.datasets') dataset_class_ = getattr(datasets_module, experiment_config['dataset']) dataset_args = experiment_config.get('dataset_args', {}) dataset = dataset_class_(**dataset_args) dataset.load_or_generate_data() print(dataset) models_module = importlib.import_module('texture.models') model_class_ = getattr(models_module, experiment_config['model']) networks_module = importlib.import_module('texture.networks') network_fn_ = getattr(networks_module, experiment_config['network']) network_args = experiment_config.get('network_args', {}) optimizer_args = experiment_config.get('optimizer_args', {}) model = model_class_(dataset_cls=dataset_class_, network_fn=network_fn_, dataset_args=dataset_args, network_args=network_args, optimizer_args=optimizer_args) print(model) experiment_config['train_args'] = { **DEFAULT_TRAIN_ARGS, **experiment_config.get('train_args', {}) } experiment_config['experiment_group'] = experiment_config.get( 'experiment_group', None) experiment_config['gpu_ind'] = gpu_ind print("Training with flags: ", experiment_config['train_args']['flags']) train_model(model, dataset, epochs=experiment_config['train_args']['epochs'], batch_size=experiment_config['train_args']['batch_size'], flags=experiment_config['train_args']['flags'], flag_args=experiment_config['train_args'].get('flag_args', {}), gpu_ind=gpu_ind, save_ext=experiment_config.pop('save_ext', None)) score = model.evaluate(dataset.X_test, dataset.y_test) print(f'Test evaluation: {score}') if save: model.save_weights()
'dataset': "p1", }, 'ffm2-nn-f3b': { 'bags': 2, 'epochs': 3, 'options': "--model ffm-nn --dropout-log 1", 'dataset': "f3", }, } parser = argparse.ArgumentParser(description='Train FFM2 model') parser.add_argument('profile', type=str, help='Train profile') parser.add_argument('--rewrite-cache', action='store_true', help='Drop cache files prior to train') parser.add_argument('--continue-train', type=str, help='Continue training of interrupted model') args = parser.parse_args() profile_name = args.profile profile = profiles[profile_name] if not os.path.exists('cache/full_train_bin_%s.index' % profile['dataset']) or args.rewrite_cache: print "Generating data..." os.system("bin/export-bin-data-%s" % profile['dataset']) train_model(fit_predict, profile_name, profile, name=args.continue_train)
hidden_size = pa['hidden_size'] epoch_number = pa['epoch_num'] gpu = pa['gpu'] if (not os.path.exists(f'experiments/{save_dir}')): os.makedirs(f'experiments/{save_dir}') file_path = f'experiments/{save_dir}/checkpoint.pt' # saving parameters with open(f'experiments/{save_dir}/parameters.json', 'w') as f: json.dump(pa, f) # load the data - data_load() from help.py print('Loading data') train_loader, validation_loader, test_loader = hp.load_data(data_path) criterion = torch.nn.NLLLoss() # build model print(f'Loading weights from {architecture}') model, optimizer = hp.get_model_and_optimizer(pa) # train model print('Training model') hp.train_model(model, optimizer, learning_rate, train_loader, validation_loader, criterion, epoch_number, file_path, gpu) # checkpoint the model print("Model has been successfully trained")
def run_experiment(experiment_config: Dict, save_weights: bool, gpu_ind: int, use_wandb: bool=False): """ experiment_config is of the form { "dataset": "FlowersDataset", "dataset_args": { "max_overlap": 0.4 }, "model": "LineModel", "network": "line_cnn_sliding_window", "network_args": { "window_width": 14, "window_stride": 7 }, "train_args": { "batch_size": 128, "epochs": 10 } } save_weights: if True, will save the final model weights to a canonical location (see Model in models/base.py) gpu_ind: integer specifying which gpu to use """ print(f'Running experiment with config {experiment_config} on GPU {gpu_ind}') #### DATASET datasets_module = importlib.import_module('colorizer.datasets') dataset_class_ = getattr(datasets_module, experiment_config['dataset']) dataset_args = experiment_config.get('dataset_args', {}) dataset = dataset_class_(**dataset_args) dataset.load_data() print(dataset) ### MODEL models_module = importlib.import_module('colorizer.models') model_class_ = getattr(models_module, experiment_config['model']) networks_module = importlib.import_module('colorizer.networks') network_fn_ = getattr(networks_module, experiment_config['network']) network_args = experiment_config.get('network_args', {}) model = model_class_(dataset_cls=dataset_class_, network_fn=network_fn_, dataset_args=dataset_args, network_args=network_args) # print(model) experiment_config['train_args'] = {**DEFAULT_TRAIN_ARGS, **experiment_config.get('train_args', {})} experiment_config['experiment_group'] = experiment_config.get('experiment_group', None) experiment_config['gpu_ind'] = gpu_ind ###TRAIN util.train_model( model, dataset, epochs=experiment_config['train_args']['epochs'], batch_size=experiment_config['train_args']['batch_size'], learning_rate = experiment_config['train_args']['learning_rate'], gpu_ind=gpu_ind, use_wandb=use_wandb ) if save_weights: model.save_weights()
train_y = pd.read_csv(split[0], usecols=['clicked'])['clicked'].values if split_name == 'full': eval_X = None eval_y = None else: eval_X = read_data(split_name + '_test') eval_y = pd.read_csv(split[1], usecols=['clicked'])['clicked'].values model = nn_mlp_2((input_size,), layers=[20]) model.compile(optimizer='adadelta', loss='binary_crossentropy') model.fit( x=train_X, y=train_y, batch_size=256, nb_epoch=1, validation_data=(None if eval_X is None else (eval_X, eval_y)), verbose=1, callbacks=[]) pred_X = read_data(split_name + '_test') if eval_X is None else eval_X pred = model.predict(pred_X, batch_size=256) pred_df = pd.read_csv(split[1]) pred_df['pred'] = pred return pred_df profile_name = 'v1' profile = {} train_model(fit_predict, 'ctr-nn-%s' % profile_name, profile)
def run_experiment(experiment_config: Dict, save_weights: bool): experiment_config["train_args"] = { **DEFAULT_TRAIN_ARGS, **experiment_config.get("train_args", {}), } experiment_config["experiment_group"] = experiment_config.get( "experiment_group", None) print(f"Running experiment with config {experiment_config}") datasets_module = importlib.import_module("font_classifier.datasets") dataset_class_ = getattr(datasets_module, experiment_config["dataset"]) dataset_args = experiment_config.get( "dataset_args", { 'test_mode_on': True if experiment_config["train_args"]["mode"] == 'test' else False }) dataset = dataset_class_(**dataset_args) dataset.load_or_generate_data() print(dataset) models_module = importlib.import_module("font_classifier.models") model_class_ = getattr(models_module, experiment_config["model"]) networks_module = importlib.import_module("font_classifier.networks") network_fn_ = getattr(networks_module, experiment_config["network"]) network_args = experiment_config.get("network_args", {}) model = model_class_( dataset_cls=dataset_class_, network_fn=network_fn_, dataset_args=dataset_args, network_args=network_args, ) print(model) train_model(model, dataset, epochs=experiment_config["train_args"]["epochs"], batch_size=experiment_config["train_args"]["batch_size"]) if experiment_config["train_args"]["validate_mismatch"] == "True": if experiment_config["train_args"]["mode"] == "val": try: mismatch_score = model.evaluate(dataset.mismatch_ds) print(f"Data mismatch score: {mismatch_score}") except AttributeError: print( f"Dataset: {dataset_class_} doesn't support mismatch validation." ) elif experiment_config["train_args"]["mode"] == "test": print( 'In test mode, mismatch data isn\'t validated since it\'s used during training.' ) if experiment_config["train_args"]["mode"] == "test": score = model.evaluate(dataset.test_ds) print(f"Test score: {score}") if save_weights: model.save_weights()
def get_res(config, train_dl, valid_dl, pretrained_embeddings, related_embeddings): model = LSTMClassifier(config, pretrained_embeddings, related_embeddings) model.cuda() top5, top1 = util.train_model(model, train_dl, valid_dl, config) del model return top5, top1
args, _ = parser.parse_known_args() # get current index across all distributed jobs idx = None if getattr(args, 'distributed', False): idx = args.hosts.index(args.current_host) if args.controller == 'rnn': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") controller = RNNController(1, 1, dim_h=args.rnn_dim_hidden, layers=args.rnn_num_layers).to(device) else: raise Exception('Controller not found') ss, Q, R = seiler_state_space() env = LQG(ss, Q, R, batch_size=args.batch_size) best = (np.inf, None, None) opt = torch.optim.Adam(controller.parameters(), lr=args.learning_rate) for i in range(args.num_inits): controller.init_weights() initial_state = copy.deepcopy(controller.state_dict()) losses = train_model(controller, env, opt, args.epochs, args.simulation_steps) if losses[-1] < best[0]: best = (losses[-1], copy.deepcopy(controller.state_dict()), initial_state) if i % 20 == 0: save_best_model(best, args.model_dir, args.controller, idx=idx)
filename = 'shakespeare' epochs = 20 batch_size = 64 seq_length = 100 rnn_units = 128 # 1024 embedding_dim = None # 256 dataset, idx_to_char, char_to_idx, vocab = preprocess(filename, batch_size, seq_length) vocab_size = len(vocab) checkpoint_dir = './training_checkpoints/' + filename checkpoint_prefix = checkpoint_dir + '/ckpt' ### Train model (comment out if only generating) model = build_model(vocab_size, embedding_dim, rnn_units, batch_size) # model.load_weights(tf.train.latest_checkpoint(checkpoint_dir)) # use this to continue training where it left off history = train_model(model, dataset, epochs=epochs, checkpoint_prefix=checkpoint_prefix) ### Generate sample (comment out if only training) #model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1) #model.load_weights(tf.train.latest_checkpoint(checkpoint_dir)) #model.build(tf.TensorShape([1, None])) #print(generate_text(model,char_to_idx,idx_to_char, # start_string=u"Romeo:\n"))