def compare_models(f1, f2, depth): # Load the models passed as arguments model1 = load_model(f1) model2 = load_model(f2) # topic_pairs = get_similar_topics(model1, model2, depth) # logging.info("Assigned the following topic pairs: {}".format(topic_pairs)) # Measure their agreement agreement = get_model_agreement(model1, model2, depth) # Display the result logging.info("Model Agreement: {}".format(agreement))
def start_evaluating(run_config, train_config, data_config, model_config): # hack to prevent the data loader from going on GPU 0 import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(run_config['cuda_device']) # torch.cuda.set_device(run_config['cuda_device']) torch.cuda.set_device(0) logger = Logger(run_config) # load the data train_data, val_data, test_data = load_data(data_config, train_config['batch_size']) if test_data is None: test_data = val_data # load the model print('Loading model...') model = load_model(model_config) assert run_config[ 'resume_path'] is not None, 'Run path must be set for evaluation.' print('Loading checkpoint ' + run_config['resume_path']) # model = logger.load_best(model) model = logger.load_epoch(model, 500) print('Putting the model on the GPU...') model.cuda() model.eval() output = eval_model(test_data, model, train_config) path = os.path.join(run_config['log_root_path'], run_config['log_dir']) with open(path, 'wb') as f: pickle.dump(output, f)
def main(): """ Training """ opt = Options().parse() data = load_data(opt) model = load_model(opt, data) model.train()
def main(): """ Training """ opt = Options().parse() data = load_data(opt) # 所得到的data包括train_data和test_data,用data.train_data获取训练数据,data.valid_data获取测试数据。 model = load_model(opt, data) model.train()
def vis(statepaths, dataloader, experiment_id, cfg): device = cfg['training']['device'] n_folds = cfg['data']['n_folds'] assert len(statepaths) == ( n_folds - 1 ), f"If using a hold out test set, we should have n_folds-1 statepaths" model = load_model(cfg, load_model_only=True) model = model.to(device) for i_fold, state in enumerate(tqdm(statepaths)): # Load model for fold state = torch.load(state) model.module.load_state_dict(state['model']) model.eval() # Test model for i_batch, (x, y_true, filename) in enumerate(dataloader): assert len(x) == 1, "batch size for testing should be 1" x = x.to(device, non_blocking=True) x.requires_grad_( ) # Specify we want gradient back to original image, not just first conv layer y_pred = model(x) cls_pred = y_pred.argmax().detach( ) # If we don't detach we get a backprop error on pytorch 1.6 score_pred = y_pred[ 0, cls_pred] # Activation of highest class, element 0 (BS 1) score_pred.backward() saliency_eachlead = x.grad.data.abs()[0] saliency_allleads, _ = torch.max(x.grad.data.abs()[0], dim=0) # Standardise saliency maps between 0 and 1 saliency_eachlead = ( (saliency_eachlead - saliency_eachlead.min()) / saliency_eachlead.max()).detach().cpu().numpy() saliency_allleads = ( (saliency_allleads - saliency_allleads.min()) / saliency_allleads.max()).detach().cpu().numpy() # Plot filename = filename[0] classid_true, classid_pred = int(y_true[0]), int(cls_pred) classname_true, classname_pred = cfg['data']['beat_types'][ classid_true], cfg['data']['beat_types'][classid_pred] correct = 'CORRECT' if classname_true == classname_pred else 'INCORRECT' title = f"Case {os.path.basename(os.path.dirname(filename))}\n" \ f"Ground truth:{classname_true}\n" \ f"Predicted:{classname_pred}" savepath = f"./output/vis/{experiment_id}/{os.path.basename(filename).split('.')[0]}_{correct}.png" plot_ecg(x[0].detach().cpu().numpy(), title=title, saliency=saliency_eachlead, savepath=savepath)
def main(): """ Training """ torch.autograd.set_detect_anomaly(True) wandb.init(entity="wenxun", project="tutorial") opt = Options().parse() data = load_data(opt) model = load_model(opt, data) model.train()
def update_prediction(db): # 트래이닝 된 모델을 로드한다. predict_days = [7, 14, 30] nets = [] # environment의 shape 샘플이 필요하므로 KQ003380 종목을 임의로 로드한다. sample_prices_list, _valid_list = lib.data.load_prices(["KQ003380"]) for pdays in predict_days: file_path = "data/v3.0-phase3-{}.data".format(pdays) env = pdenviron.PredEnv(prices_list=sample_prices_list, predict_days=pdays) net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n) models.load_model(file_path, net) nets.append(net) today = datetime.datetime.now().date() with closing(db.cursor()) as cur: cur.execute("select scode from stocks") for row in cur.fetchall(): scode = row[0] prices_list, val_prices_list = lib.data.load_prices([scode]) if len(prices_list[0].open) < 60: continue try: with closing(db.cursor()) as cur2: for i in range(0, len(nets)): pdays = predict_days[i] env = pdenviron.PredEnv(prices_list=prices_list, predict_days=pdays) # offset을 마지막 일자로 한다. obs = env.reset(0, len(prices_list[0].open) - 1) values = environ.apply_model_from_state(obs, nets[i]) # 예측결과 저장 cur2.execute( stmt4, (scode, today, pdays, values[0], values[1], values[2], values[3], values[4])) except Exception as ex: logger.error('update_prediction() failed:' + str(ex))
def test(statepaths, dataloader, cfg): n_classes_test = len(set(cfg['data']['beat_types']['test'].values())) device = cfg['training']['device'] n_test = len(dataloader) n_folds = cfg['data']['n_folds'] assert len(statepaths) == ( n_folds - 1 ), f"If using a hold out test set, we should have n_folds-1 statepaths" predictions = np.zeros((n_test, n_classes_test, n_folds - 1)) targets = np.zeros(n_test) filenames = [] model = load_model(cfg, load_model_only=True) model = model.to(device) for i_fold, state in enumerate(tqdm(statepaths)): # Load model for fold state = torch.load(state) if cfg['training']['data_parallel']: model.module.load_state_dict(state['model']) else: model.load_state_dict(state['model']) model.eval() # Test model for i_batch, (x, y_true, filename) in enumerate(dataloader): assert len(x) == 1, "batch size for testing should be 1" x = x.to(device, non_blocking=True) y_true = y_true.to(device, non_blocking=True) with torch.no_grad(): y_pred = model(x) y_pred = y_pred[:, :n_classes_test] predictions[i_batch, :, i_fold] = y_pred[0].detach().float().cpu() if i_fold == 0: targets[i_batch] = y_true[0].detach().cpu() filenames.append(filename[0]) ensembled_preds = np.mean(predictions, axis=-1) predicted_classes = np.argmax(ensembled_preds, axis=-1) kappa = cohen_kappa_score(targets, predicted_classes) accuracy = accuracy_score(targets, predicted_classes) cm = confusion_matrix(targets, predicted_classes) print(f"Cohen's Kappa: {kappa}") print(f"Accuracy: {accuracy}") print(f"Confusion matrix:\n{cm}") return targets, predicted_classes, filenames, kappa, accuracy, cm
def start_training(run_config, train_config, data_config, model_config): # hack to prevent the data loader from going on GPU 0 import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(run_config['cuda_device']) # torch.cuda.set_device(run_config['cuda_device']) torch.cuda.set_device(0) # initialize logging and plotting logger = Logger(run_config) plotter = Plotter(logger.log_dir, run_config, train_config, model_config, data_config) # load the data train_data, val_data, test_data = load_data(data_config, train_config['batch_size']) if val_data is None: val_data = test_data # load the model, optimizers print('Loading model...') model = load_model(model_config) print('Loading optimizers...') optimizers, schedulers = load_opt_sched(train_config, model) if run_config['resume_path']: print('Resuming checkpoint ' + run_config['resume_path']) model, optimizers, schedulers = logger.load_checkpoint( model, optimizers) print('Putting the model on the GPU...') model.cuda() while True: # training out = train(train_data, model, optimizers, train_config, data_config) logger.log(out, 'Train') plotter.plot(out, 'Train') if val_data: # validation out = validate(val_data, model, train_config, data_config) logger.log(out, 'Val') plotter.plot(out, 'Val') if logger.save_epoch(): logger.save_checkpoint(model, optimizers) logger.step() plotter.step() schedulers[0].step() schedulers[1].step() plotter.save()
def main(): """ Training """ opt = Options().parse() opt.print_freq = opt.batchsize seed(opt.manualseed) print("Seed:", str(torch.seed())) if opt.phase == "inference": opt.batchsize=1 data = load_data(opt) model = load_model(opt, data) if opt.phase == "inference": model.inference() else: if opt.path_to_weights: model.test() else: train_start = time.time() model.train() train_time = time.time() - train_start print (f'Train time: {train_time} secs')
ds_test = ECGDataset(cfg, 'test', fold) dl_train = DataLoader(ds_train, cfg['training']['batch_size'], shuffle=True, num_workers=cfg['training']['n_workers'], pin_memory=True, collate_fn=collate_longest_in_batch) dl_test = DataLoader(ds_test, cfg['training']['batch_size'], shuffle=False, num_workers=cfg['training']['n_workers'], pin_memory=True, collate_fn=collate_longest_in_batch) # Model model, starting_epoch, state = load_model(cfg) optimizer, scheduler = load_optimizer(model, cfg, state, steps_per_epoch=(len(dl_train))) train_criterion, test_criterion = load_criterion(cfg) # Train writer = get_summary_writer(cfg, log_dir) best_loss, best_path, last_save_path = 1e10, None, None n_epochs = cfg['training']['n_epochs'] for epoch in range(starting_epoch, n_epochs + 1): print(f"\nEpoch {epoch} of {n_epochs}") # Cycle
# torch.cuda.set_device(run_config['cuda_device']) torch.cuda.set_device(0) # initialize logging logger = Logger(run_config) # load the data train_data, val_data, test_data = load_data(data_config, train_config['batch_size']) if val_data is None: val_data = test_data data = val_data # load the model, optimizers print('Loading model...') model = load_model(model_config) assert run_config['resume_path'] is not None, 'Model must be resumed from checkpoint.' if run_config['resume_path']: print('Resuming checkpoint ' + run_config['resume_path']) model = logger.load_best(model) print('Putting the model on the GPU...') model.cuda() out = visualize(data, model, train_config, data_config) ################################################################################ ## plot data, predictions, and reconstructions
def view_model(fname, num_terms): model = load_model(fname) logging.info("{} Topics".format(model.num_topics)) model.print_topics(model.num_topics, num_words=num_terms)
def main(): cfg = load_config(CONFIG) # distributed settings parser = argparse.ArgumentParser() parser.add_argument('--local_rank', type=int, default=0) parser.add_argument('--ngpu', type=int, default=4) args = parser.parse_args() if cfg['training']['data_parallel'] == 'distributed': distributed = True local_rank = args.local_rank torch.cuda.set_device(local_rank) world_size = args.ngpu torch.distributed.init_process_group('nccl', init_method="tcp://localhost:16534", world_size=world_size, rank=local_rank) else: distributed = False local_rank = None world_size = None # settings bs_train, bs_test, n_workers = cfg['training']['batch_size_train'], cfg['training']['batch_size_test'], cfg['training']['n_workers'] n_epochs = cfg['training']['n_epochs'] transforms_train, transforms_test = load_transforms(cfg) # data ds_train = E32Dataset(cfg, cfg['paths']['data_train'], 'train', transforms=transforms_train) ds_test = E32Dataset(cfg, cfg['paths']['data_test'], 'test', transforms=transforms_test) sampler_train = DistributedSampler(ds_train, num_replicas=world_size, rank=local_rank) if distributed else None sampler_test = DistributedSampler(ds_test, num_replicas=world_size, rank=local_rank) if distributed else None dl_train = DataLoader(ds_train, bs_train, shuffle=False if distributed else True, num_workers=n_workers, pin_memory=False, sampler=sampler_train) dl_test = DataLoader(ds_test, bs_test, shuffle=False, num_workers=n_workers, pin_memory=False, sampler=sampler_test) # model model, starting_epoch, state = load_model(cfg, local_rank) optimizer, scheduler = load_optimizer(model, cfg, state, steps_per_epoch=(len(dl_train))) train_criterion, test_criterion = load_criterion(cfg) # WandB if not local_rank: wandb.init(project="a4c3d", config=cfg, notes=cfg.get("description", None)) wandb.save("*.mp4") # Write MP4 files immediately to WandB wandb.watch(model) # training best_loss, best_path, last_save_path = 1e10, None, None for epoch in range(starting_epoch, n_epochs + 1): if local_rank == 0: print(f"\nEpoch {epoch} of {n_epochs}") # Cycle train_loss = cycle('train', model, dl_train, epoch, train_criterion, optimizer, cfg, scheduler, local_rank=local_rank) test_loss = cycle('test', model, dl_test, epoch, test_criterion, optimizer, cfg, scheduler, local_rank=local_rank) # Save state if required if local_rank == 0: model_weights = model.module.state_dict() if cfg['training']['data_parallel'] else model.state_dict() state = {'epoch': epoch + 1, 'model': model_weights, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler} save_name = f"{epoch}_{test_loss:.05f}.pt" best_loss, last_save_path = save_state(state, save_name, test_loss, best_loss, cfg, last_save_path, lowest_best=True) # Vis seg vis_mse(ds_test, model, epoch, cfg) if local_rank == 0: save_name = f"FINAL_{epoch}_{test_loss:.05f}.pt" save_state(state, save_name, test_loss, best_loss, cfg, last_save_path, force=True)
def train_model(cuda, phase, premodel, pdays): """ cuda : True / False phase : 1~3 premodel: data/phase1_model.data pdays: integer """ device = torch.device("cuda" if cuda else "cpu") phase = int(phase) if phase == 1: config = sconfig elif phase == 2: config = mconfig elif phase == 3: config = pconfig run_name = "v" + config.version + "-phase" + str(phase) saves_path = os.path.join("saves", run_name) os.makedirs(saves_path, exist_ok=True) save_name = "" writer = SummaryWriter(comment=run_name) prices_list, val_prices_list = data.load_prices(config.choices) if phase == 1: s_env = environ.StocksEnvS(prices_list) stock_env = s_env val_stock_env = environ.StocksEnvS(val_prices_list) save_name = "{}.data".format(run_name) elif phase == 2: # phase 1 의 network 그래프를 로드한다. s_env = environ.StocksEnvS(prices_list) prenet = models.SimpleFFDQN(s_env.observation_space.shape[0], s_env.action_space.n) #.to(device) models.load_model(premodel, prenet) # phase2 환경 생성 stock_env = environ.StocksEnvM(prices_list, prenet) val_stock_env = environ.StocksEnvM(val_prices_list, prenet) save_name = "{}.data".format(run_name) elif phase == 3: predict_days = int(pdays) stock_env = pdenviron.PredEnv(prices_list=prices_list, predict_days=7) val_stock_env = pdenviron.PredEnv(prices_list=prices_list, predict_days=7) save_name = "{}-{}.data".format(run_name, predict_days) net = models.SimpleFFDQN(stock_env.observation_space.shape[0], stock_env.action_space.n).to(device) tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.EpsilonGreedyActionSelector(config.epsilon_start) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast( stock_env, agent, config.gamma, steps_count=config.reward_steps) buffer = ptan.experience.ExperienceReplayBuffer(exp_source, config.replay_size) optimizer = optim.Adam(net.parameters(), lr=config.learning_rate) # main training loop step_idx = 0 eval_states = None best_mean_val = None with common.RewardTracker(writer, np.inf, group_rewards=100) as reward_tracker: while step_idx < config.end_step: step_idx += 1 buffer.populate(1) selector.epsilon = max( config.epsilon_stop, config.epsilon_start - step_idx / config.epsilon_steps) new_rewards = exp_source.pop_rewards_steps() if new_rewards: reward_tracker.reward(new_rewards[0], step_idx, selector.epsilon) if len(buffer) < config.replay_initial: continue if eval_states is None: print("Initial buffer populated, start training") eval_states = buffer.sample(config.states_to_evaluate) eval_states = [ np.array(transition.state, copy=False) for transition in eval_states ] eval_states = np.array(eval_states, copy=False) if step_idx % config.eval_every_step == 0: mean_val = common.calc_values_of_states(eval_states, net, device=device) writer.add_scalar("values_mean", mean_val, step_idx) if best_mean_val is None or best_mean_val < mean_val: if best_mean_val is not None: print("%d: Best mean value updated %.3f -> %.3f" % (step_idx, best_mean_val, mean_val)) best_mean_val = mean_val #torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val)) optimizer.zero_grad() batch = buffer.sample(config.batch_size) loss_v = common.calc_loss(batch, net, tgt_net.target_model, config.gamma**config.reward_steps, device=device) loss_v.backward() optimizer.step() if step_idx % config.target_net_sync == 0: tgt_net.sync() if step_idx % config.checkpoint_every_step == 0: idx = step_idx // config.checkpoint_every_step torch.save( net.state_dict(), os.path.join(saves_path, "checkpoint-%d.data" % idx)) if step_idx % config.validation_every_step == 0: res = validation.validation_run(stock_env, net, device=device) for key, val in res.items(): writer.add_scalar(key + "_test", val, step_idx) res = validation.validation_run(val_stock_env, net, device=device) for key, val in res.items(): writer.add_scalar(key + "_val", val, step_idx) models.save_model(os.path.join(saves_path, save_name), net, {"predict_days": predict_days})