def train(args): is_training = True session = tf.compat.v1.Session(config=config.TF_SESSION_CONFIG) dataset = AudioWrapper(args, 'train', is_training, session) wavs, labels = dataset.get_input_and_output_op() model = models.__dict__[args.arch](args) model.build(wavs=wavs, labels=labels, is_training=is_training) trainer = Trainer(model, session, args, dataset) trainer.train()
def main(): N = 12 COMM_RANGE = 2.5 headless = False leader = True dir_name = "data" try: os.mkdir(dir_name) except FileExistsError: pass data_name = 'flocking_N=%d' % N data_path = os.path.join(dir_name, '%s.pt' % data_name) data = Trainer(data_path=data_path) z = Uniform(low=2.0 * torch.ones(N, 1), high=5.0 * torch.ones(N, 1)) # uniform between 0 and 1 in the z direction xy_normal = Normal(torch.zeros(N, 2), 1.25) # gaussian in the xy direction dist = CombinedDistribution([xy_normal, z], mixer='cat', dim=1) model = Reynolds(N=N, D=6, K=1, OUT_DIM=3) env = gym.make('mrs-v0', state_fn=state_fn, update_fn=update_fn, done_fn=done_fn, N_AGENTS=N, START_POS=dist, K_HOPS=1, COMM_RANGE=COMM_RANGE, ACTION_TYPE='set_target_vel', HEADLESS=headless) if leader: leader_action_policy = RandomAction() action_fn = leader_action_policy.action_fn environment = env.get_env() leader_agent = environment.agents[0] environment.set_colour(leader_agent, [1., 0., 0.]) else: action_fn = lambda action, state: action data.save_trainer_onexit() data = generate_mrs(env=env, model=model, action_fn=action_fn, trainer=data, datapoints=1000, episode_length=200)
scheduler = LR_Scheduler('poly', learning_rate, num_epochs, len(dataloader_train)) ################################## criterion1 = FocalLoss(gamma=3) criterion2 = nn.CrossEntropyLoss() criterion3 = lovasz_softmax criterion = lambda x,y: criterion1(x, y) # criterion = lambda x,y: 0.5*criterion1(x, y) + 0.5*criterion3(x, y) mse = nn.MSELoss() if not evaluation: writer = SummaryWriter(log_dir=os.path.join(log_path, task_name)) f_log = open(os.path.join(log_path, task_name + ".log"), 'w') trainer = Trainer(criterion, optimizer, n_class, size_g, size_p, sub_batch_size, mode, lamb_fmreg) evaluator = Evaluator(n_class, size_g, size_p, sub_batch_size, mode, test) best_pred = 0.0 print("start training......") for epoch in range(num_epochs): trainer.set_train(model) optimizer.zero_grad() tbar = tqdm(dataloader_train); train_loss = 0 for i_batch, sample_batched in enumerate(tbar): if evaluation: break scheduler(optimizer, i_batch, epoch, best_pred) loss = trainer.train(sample_batched, model, global_fixed) train_loss += loss.item() score_train, score_train_global, score_train_local = trainer.get_scores() if mode == 1: tbar.set_description('Train loss: %.3f; global mIoU: %.3f' % (train_loss / (i_batch + 1), np.mean(np.nan_to_num(score_train_global["iou"]))))
def main(seed=25): seed_everything(25) device = torch.device('cuda:0') # arguments args = Args().parse() n_class = args.n_class img_path_train = args.img_path_train mask_path_train = args.mask_path_train img_path_val = args.img_path_val mask_path_val = args.mask_path_val model_path = os.path.join(args.model_path, args.task_name) # save model log_path = args.log_path output_path = args.output_path if not os.path.exists(model_path): os.makedirs(model_path) if not os.path.exists(log_path): os.makedirs(log_path) if not os.path.exists(output_path): os.makedirs(output_path) task_name = args.task_name print(task_name) ################################### evaluation = args.evaluation test = evaluation and False print("evaluation:", evaluation, "test:", test) ################################### print("preparing datasets and dataloaders......") batch_size = args.batch_size num_workers = args.num_workers config = args.config data_time = AverageMeter("DataTime", ':3.3f') batch_time = AverageMeter("BatchTime", ':3.3f') dataset_train = DoiDataset(img_path_train, config, train=True, root_mask=mask_path_train) dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) dataset_val = DoiDataset(img_path_val, config, train=True, root_mask=mask_path_val) dataloader_val = DataLoader(dataset_val, batch_size=batch_size, shuffle=False, num_workers=num_workers) ################################### print("creating models......") model = DoiNet(n_class, config['min_descriptor'] + 6, 4) model = create_model_load_weights(model, evaluation=False, ckpt_path=args.ckpt_path) model.to(device) ################################### num_epochs = args.epochs learning_rate = args.lr optimizer = get_optimizer(model, learning_rate=learning_rate) scheduler = LR_Scheduler(args.scheduler, learning_rate, num_epochs, len(dataloader_train)) ################################## criterion_node = nn.CrossEntropyLoss() criterion_edge = nn.BCELoss() alpha = args.alpha writer = SummaryWriter(log_dir=log_path + task_name) f_log = open(log_path + task_name + ".log", 'w') ####################################### trainer = Trainer(criterion_node, criterion_edge, optimizer, n_class, device, alpha=alpha) evaluator = Evaluator(n_class, device) best_pred = 0.0 print("start training......") log = task_name + '\n' for k, v in args.__dict__.items(): log += str(k) + ' = ' + str(v) + '\n' print(log) f_log.write(log) f_log.flush() for epoch in range(num_epochs): optimizer.zero_grad() tbar = tqdm(dataloader_train) train_loss = 0 train_loss_edge = 0 train_loss_node = 0 start_time = time.time() for i_batch, sample in enumerate(tbar): data_time.update(time.time() - start_time) if evaluation: # evaluation pattern: no training break scheduler(optimizer, i_batch, epoch, best_pred) loss, loss_node, loss_edge = trainer.train(sample, model) train_loss += loss.item() train_loss_node += loss_node.item() train_loss_edge += loss_edge.item() train_scores_node, train_scores_edge = trainer.get_scores() batch_time.update(time.time() - start_time) start_time = time.time() if i_batch % 2 == 0: tbar.set_description( 'Train loss: %.4f (loss_node=%.4f loss_edge=%.4f); F1 node: %.4f F1 edge: %.4f; data time: %.2f; batch time: %.2f' % (train_loss / (i_batch + 1), train_loss_node / (i_batch + 1), train_loss_edge / (i_batch + 1), train_scores_node["macro_f1"], train_scores_edge["macro_f1"], data_time.avg, batch_time.avg)) trainer.reset_metrics() data_time.reset() batch_time.reset() if epoch % 1 == 0: with torch.no_grad(): model.eval() print("evaluating...") tbar = tqdm(dataloader_val) start_time = time.time() for i_batch, sample in enumerate(tbar): data_time.update(time.time() - start_time) pred_node, pred_edge = evaluator.eval(sample, model) val_scores_node, val_scores_edge = evaluator.get_scores() batch_time.update(time.time() - start_time) tbar.set_description( 'F1 node: %.4f F1 edge: %.4f; data time: %.2f; batch time: %.2f' % (val_scores_node["macro_f1"], val_scores_edge["macro_f1"], data_time.avg, batch_time.avg)) start_time = time.time() data_time.reset() batch_time.reset() val_scores_node, val_scores_node = evaluator.get_scores() evaluator.reset_metrics() best_pred = save_model(model, model_path, val_scores_node, val_scores_edge, alpha, task_name, epoch, best_pred) write_log(f_log, train_scores_node, train_scores_edge, val_scores_node, val_scores_edge, epoch, num_epochs) write_summaryWriter(writer, train_loss / len(dataloader_train), optimizer, train_scores_node, train_scores_edge, val_scores_node, val_scores_edge, epoch) f_log.close()
num_epochs = args.epoch learning_rate = args.lr momentum = args.momentum weight_decay = args.weight_decay opt_args = dict(lr=learning_rate, momentum=momentum, weight_decay=weight_decay) optimizer = get_optimizer(model, **opt_args) scheduler = LR_Scheduler('poly', learning_rate, num_epochs, len(dataloader_train)) ################################## criterion = BCELoss() if not evaluation: writer = SummaryWriter(log_dir=log_path + task_name) f_log = open(log_path + task_name + ".log", 'w') trainer = Trainer(criterion, optimizer, n_class) evaluator = Evaluator(n_class, test) best_pred = 0.0 print("start training......") for epoch in range(num_epochs): optimizer.zero_grad() tbar = tqdm(dataloader_train) train_loss = 0 start_time = time.time() for i_batch, sample_batched in enumerate(tbar): print(i_batch) data_time.update(time.time()-start_time) if evaluation: # evaluation pattern: no training break
def main(): # Parameters N = 12 D = 6 K = 1 COMM_RANGE = 2.5 datapoints = 1000 episode_length = 200 headless = True leader = True # File Paths dir_path = "data" evaldata_path = os.path.join(dir_path, "%s_data.pt") try: os.mkdir(dir_path) except FileExistsError: pass # Initialise Models reynolds = Reynolds(N, D, 1, 3) models = { "reynolds": reynolds, "random": RandomController(OUT_DIM=3) } # we will compare reynolds flocking to a random model # Create Environment z = Uniform(low=2.0 * torch.ones(N, 1), high=5.0 * torch.ones(N, 1)) xy_normal = Normal(torch.zeros(N, 2), 1.0) dist = CombinedDistribution( [xy_normal, z], mixer='cat', dim=1) # create custom starting state distribution env = gym.make('mrs-v0', state_fn=state_fn, update_fn=update_fn, N_AGENTS=N, START_POS=dist, K_HOPS=1, COMM_RANGE=COMM_RANGE, ACTION_TYPE='set_target_vel', HEADLESS=headless) startpos = [ env.generate_start_pos() for _ in range(int(datapoints / episode_length * 2)) ] env.START_POS = StartPosGenerator( startpos) # use the same starting state for each model # Generate and Analyse Data analysers = {} for name, model in models.items(): print(name) data = Trainer(K=K) is_data_loaded = data.load_trainer( path=evaldata_path % name) # load simulation data if it exists if not is_data_loaded: # generate data if it does not exist data.save_trainer_onexit(path=evaldata_path % name) simulate(env=env, model=model, trainer=data, datapoints=datapoints, episode_length=episode_length, leader=leader) analysers[name] = MRSAnalytics( data ) # compute flocking metrics (separation, cohesion, leader dist) analysers[name].name = name # Draw Plots plot_separation(*analysers.values()) plot_cohesion(*analysers.values()) plot_leader_dist(*analysers.values()) # Show show_plots()