def train_single(config: GNMTConfig): """ Train model on single device. Args: config (GNMTConfig): Config for model. """ print(" | Starting training on single device.") pre_train_dataset = load_dataset(data_files=config.pre_train_dataset, schema=config.dataset_schema, batch_size=config.batch_size, epoch_count=config.epochs, sink_mode=config.dataset_sink_mode, sink_step=config.dataset_sink_step ) if config.pre_train_dataset else None fine_tune_dataset = load_dataset(data_files=config.fine_tune_dataset, schema=config.dataset_schema, batch_size=config.batch_size, epoch_count=config.epochs, sink_mode=config.dataset_sink_mode, sink_step=config.dataset_sink_step ) if config.fine_tune_dataset else None test_dataset = load_dataset( data_files=config.test_dataset, schema=config.dataset_schema, batch_size=config.batch_size, epoch_count=config.epochs, sink_mode=config.dataset_sink_mode, sink_step=config.dataset_sink_step) if config.test_dataset else None _build_training_pipeline(config=config, pre_training_dataset=pre_train_dataset, fine_tune_dataset=fine_tune_dataset, test_dataset=test_dataset)
def train_single(config: TransformerConfig, platform: "Ascend"): """ Train model on single device. Args: config (TransformerConfig): Config for model. """ print(" | Starting training on single device.") pre_train_dataset = load_dataset(data_files=config.pre_train_dataset, batch_size=config.batch_size, epoch_count=1, sink_mode=config.dataset_sink_mode, sink_step=config.dataset_sink_step ) if config.pre_train_dataset else None fine_tune_dataset = load_dataset(data_files=config.fine_tune_dataset, batch_size=config.batch_size, epoch_count=1, sink_mode=config.dataset_sink_mode, sink_step=config.dataset_sink_step ) if config.fine_tune_dataset else None test_dataset = load_dataset( data_files=config.test_dataset, batch_size=config.batch_size, epoch_count=1, sink_mode=config.dataset_sink_mode, sink_step=config.dataset_sink_step) if config.test_dataset else None _build_training_pipeline(config=config, pre_training_dataset=pre_train_dataset, fine_tune_dataset=fine_tune_dataset, test_dataset=test_dataset, platform=platform)
def train(model: tf.estimator.Estimator, nb_epochs: int, train_data_path: str, val_data_path: str, batch_size: int = 32): train_epoch_history = [ model.evaluate( input_fn=lambda: load_dataset(train_data_path, shuffle=False)) ] validation_epoch_history = [ model.evaluate( input_fn=lambda: load_dataset(val_data_path, shuffle=False)) ] for epoch in range(nb_epochs): model_spec = model.train( input_fn=lambda: load_dataset('data/train.tfrecords', epochs=1, shuffle=True, batch_size=batch_size)) train_epoch_history.append( model.evaluate( input_fn=lambda: load_dataset(train_data_path, shuffle=False))) validation_epoch_history.append( model.evaluate( input_fn=lambda: load_dataset(val_data_path, shuffle=False))) logging.info(f"EPOCH: {epoch}:\n" f"\tval_loss: {validation_epoch_history[-1]['loss']}\n" f"\ttrain_loss: {train_epoch_history[-1]['loss']}\n") return train_epoch_history, validation_epoch_history
def train_parallel(config: GNMTConfig): """ Train model with multi ascend chips. Args: config (GNMTConfig): Config for MASS model. """ _setup_parallel_env() print(f" | Starting training on {os.getenv('RANK_SIZE', None)} devices.") pre_train_dataset = load_dataset( data_files=config.pre_train_dataset, batch_size=config.batch_size, sink_mode=config.dataset_sink_mode, rank_size=MultiAscend.get_group_size(), rank_id=MultiAscend.get_rank()) if config.pre_train_dataset else None fine_tune_dataset = load_dataset( data_files=config.fine_tune_dataset, batch_size=config.batch_size, sink_mode=config.dataset_sink_mode, rank_size=MultiAscend.get_group_size(), rank_id=MultiAscend.get_rank()) if config.fine_tune_dataset else None test_dataset = load_dataset( data_files=config.test_dataset, batch_size=config.batch_size, sink_mode=config.dataset_sink_mode, rank_size=MultiAscend.get_group_size(), rank_id=MultiAscend.get_rank()) if config.test_dataset else None _build_training_pipeline(config=config, pre_training_dataset=pre_train_dataset, fine_tune_dataset=fine_tune_dataset, test_dataset=test_dataset)
def infer(config): """ Transformer infer api. Args: config (TransformerConfig): Config. Returns: list, result with """ eval_dataset = load_dataset(data_files=config.test_dataset, batch_size=config.batch_size, epoch_count=1, sink_mode=config.dataset_sink_mode, shuffle=False) if config.test_dataset else None prediction = transformer_infer(config, eval_dataset) return prediction
def infer(config): """ GNMT infer api. Args: config (GNMTConfig): Config. Returns: list, result with """ eval_dataset = load_dataset(data_files=config.test_dataset, batch_size=config.batch_size, sink_mode=config.dataset_sink_mode, drop_remainder=False, is_translate=True, shuffle=False) if config.test_dataset else None prediction = gnmt_infer(config, eval_dataset) return prediction
def main(args): print(args) paths = download_data() for i in paths: parse_data(i) print("=============== Parsing dataset complete ===============") forecast_imputation() # Load configurations configs = load_config("config.yml") modelConfig = configs["model"] time = dt.now().strftime("%d-%m-%Y %H:%M:%S") logdir = "runs/" + time # # Initialize SummaryWriter for tensorboard writer = Logger(logdir) write_configs(writer, modelConfig) # # Preprocess the data train_loader, validation_loader, test_loader, data_mean, data_std, forecast_mean, forecast_std = load_dataset( difference=0, batch_size=modelConfig["batchsize"]) # # Baseline model # baseline_model = Persistance(18, writer) # initialize Model model = NN_Model(input_dim=train_loader.dataset.tensors[0].size(1), output_dim=1, hidden_layers=modelConfig["hiddenlayers"], writer=writer, device=args.device) model.train(train_loader, validation_loader, epochs=modelConfig["epochs"], lr=modelConfig["lr"], step_size=modelConfig["step_size"], gamma=modelConfig["gamma"], weight_decay=modelConfig["weight_decay"]) try: x = load_latest(10, 18, data_mean.item(), data_std.item(), forecast_mean, forecast_std) ypred = model.predict(x) ypred = (ypred * data_std.item()) + data_mean.item() print("Model running successful!") except Exception as err: print("Error message: ", err) ypred = args.prev print("model running failed... sending prev value") args.prev = ypred # b_rmse, b_ypred, b_ytest = baseline_model.test(test_loader) # rmse, ypred, ytest = model.test(test_loader) # print("RMSE: ", rmse) # print("BASELINE: ", b_rmse) # writer.add_text("RMSE", str(rmse.item()), 0) # writer.add_text("RMSE/Baseline", str(b_rmse.item()), 0) #################### # Lagged Corr # #################### # lagged_vals = get_lagged_correlation(ypred = ypred, # ytrue = test_loader.dataset.tensors[1], # num_delta= 180 ) # writer.draw_lagged_correlation(lagged_vals) # y_test_unnormalized = (ytest * data_std) + data_mean # y_pred_unnormalized = (ypred * data_std) + data_mean # trade_env = Trader(y_test_unnormalized.tolist(), y_pred_unnormalized.tolist(), writer, 18) # trade_env.trade() # result = trade_env.pay_back() # print ("tota profit", result) writer.close() print("ypred: ", ypred) return ypred
args = parser.parse_args() ## fill up config dict config = { 'type': 'qualityNet', 'random_state': args.random_state, 'batch_size': args.batch_size, 'nb_epochs': args.nb_epochs, 'learning_rate': args.learning_rate, 'data': args.data, 'results': args.results } ## net creation and data loading net = QualityNet() data, targets = load_dataset(args.data, verbose=args.verbose) ## data selection split_config = [args.valid_ratio, args.test_ratio, args.random_state] train, valid, test = create_split(data, targets, *split_config) train_data, train_targets = train[0][:, np.newaxis, :, :], train[1] if args.balanced: train_data, train_targets = balance_dataset(train_data, train_targets, args.verbose) valid_data, valid_targets = valid[0][:, np.newaxis, :, :], valid[1] if test is not None: test_data, test_targets = test[0][:, np.newaxis, :, :], test[1] ## inform the user if args.verbose: print(' [-]: using {} training examples (~{}%)'.format(
featurizer = MelSpectrogram(MelSpectrogramConfig()) device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) BATCH_SIZE = 8 torch.backends.cudnn.deterministic = True random.seed(42) np.random.seed(42) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed_all(42) from src.dataset import load_dataset dataloader_train, dataloader_val = load_dataset(featurizer, BATCH_SIZE) from model import Tacotron2 generator = Tacotron2(n_mels=80, n_frames=1).to(device) from math import exp, log optimizer = optim.Adam(generator.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-6, weight_decay=1e-6) lambda1 = lambda step: exp(log(0.01)*min(15000, step)/15000) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1]) import os os.environ['CUDA_LAUNCH_BLOCKING'] = "1" import wandb wandb.init(
def test_load_dataset(): (x_train, y_train), (x_public_test, y_public_test), (x_private_test, y_private_test) =\ dataset.load_dataset(FIXTURE_PATH) assert len(x_train) == len(x_public_test) == len(x_private_test) == 3 assert len(y_train) == len(y_public_test) == len(y_private_test) == 3