def evaluate(self): acc, losses = util.get_meters(self.n_batches_eval, epoch=None) print_mod = int(self.n_batches_eval / args.epoch_reports) count = total = 0 self.net.eval() with torch.no_grad(): for batch, (inputs, labels) in enumerate(tqdm(self.testloader), 1): if args.use_cuda: inputs, labels = inputs.cuda(), labels.cuda() logits = self.net(inputs) loss = self.loss_fn(logits, labels) losses.update(loss.item(), inputs.size(0)) _, pred = logits.max(1) correct = pred.eq(labels).sum().item() batch_acc = 100. * correct / labels.size(0) count += correct total += labels.size(0) acc.update(batch_acc, labels.size(0)) util.stats.test_loss.append(loss.item()) util.stats.test_acc.append(batch_acc) if batch % print_mod == print_mod - 1: log_str = '\nBatch: [{}/{}]\tLoss: {:.4f}\tAccuracy: {:.2f} % ({:.2f} %)'.format( batch, self.n_batches_eval, loss.item(), batch_acc, acc.avg) misc.log(self.log_path, log_str) print('Acc {acc.avg:.3f} | Loss {losses.avg:.2e}'.format( acc=acc, losses=losses)) return 100. * count / total
def run_train(self): final_epoch = False for epoch in range(1, args.n_epochs + 1): misc.log( self.log_path, 'Elapsed Time: {}/{}\n'.format( self.timer.measure(), self.timer.measure(epoch / float(args.n_epochs)))) if self.scheduler: lr = self.scheduler.get_lr()[0] else: lr = args.lr self.train(epoch) acc = self.evaluate() improvement = acc > self.best_acc self.best_acc = max(acc, self.best_acc) misc.log( self.log_path, 'Best Accuracy: {} | Current Learning Rate: {}'.format( np.round(self.best_acc, 5), np.round(lr, 5))) if epoch == args.n_epochs: final_epoch = True if args.save: misc.save_model(args=args, model_name=self.model_name, best_acc=self.best_acc, stats=util.stats, state={ 'epoch': epoch, 'state_dict': self.net.state_dict(), 'best_acc': self.best_acc, 'optimizer': self.optimizer.state_dict() }, improvement=improvement, epoch=epoch, final_epoch=final_epoch)
def __init__(self): if args.save: self.model_name = misc.name_model(args) with open('{name}/parameters.txt'.format(name=self.model_name), 'w+') as f: f.write(str(args)) self.log_path = '{name}/log.txt'.format(name=self.model_name) else: self.log_path = './log.txt' misc.log(self.log_path, str(vars(args))) trainloader, self.testloader = datasets.__dict__[args.dataset]( args, train=True, test=True) if not args.reload: self.trainloader = trainloader self.n_batches = len(trainloader) self.n_batches_eval = len(self.testloader) self.net = util.build_neuralnet() Util.network_summary(net=self.net, input_shape=(3, args.resolution, args.resolution), batch_size=args.batch_size, device='cuda' if args.use_cuda else 'cpu') self.loss_fn = util.cost() self.optimizer, self.scheduler = util.build_optimizer( net=self.net, n_batches=self.n_batches) self.best_acc = -np.inf self.timer = Meters.Timer()
def evaluate(self, test, num_eval_episodes=None): """ Evaluate method Params: ---- test: tf_agents.environments.TFPyEnvironment Test environments """ if num_eval_episodes is None: num_eval_episodes = self.num_eval_episodes eval_avg_return = compute_avg_return(test, self.policy, num_eval_episodes) log("Eval Avg Reward:", eval_avg_return) return (eval_avg_return, eval_avg_return)
def _train(self): """ Private training method """ # tf function wrapper self.agent.train = common.function(self.agent.train) # Reset the train step self.agent.train_step_counter.assign(0) # Evaluate the agent's policy once before training. avg_return = compute_avg_return(self.eval_env, self.agent.policy, self.num_eval_episodes) self.eval_rewards.append(avg_return) # train loop for _ in range(int(self.train_iters)): # Collect a few steps using collect_policy and save to the replay buffer. for _s in range(self.collect_steps_per_iter): collect_step(self.train_env, self.agent.collect_policy, self.replay_buffer) # Sample a batch of data from the buffer and update the agent's network. experience, unused_info = next(self.iterator) train_loss = self.agent.train(experience).loss step = self.agent.train_step_counter.numpy() if step % self.log_interval == 0: log('step = {0}: loss = {1}'.format(step, train_loss)) if step % self.eval_interval == 0: avg_return = compute_avg_return(self.eval_env, self.agent.policy, self.num_eval_episodes) log('step = {0}: Average Return = {1}'.format( step, avg_return)) self.eval_rewards.append(avg_return) log("Best episode avg reward:", max(self.eval_rewards))
def train(self, epoch): if args.reload: trainloader = datasets.__dict__[args.dataset](args, train=True, test=False) else: trainloader = self.trainloader acc, losses = util.get_meters(self.n_batches, epoch=epoch) print_mod = int(self.n_batches / args.epoch_reports) self.net.train() batch = -1 for inputs, labels in tqdm(trainloader, total=self.n_batches): batch += 1 """ from imageio import imwrite import torchvision imwrite(uri='./data/train_instance8x8.png', im=np.transpose(torchvision.utils.make_grid(inputs, nrow=16, padding=0).numpy(), (1, 2, 0))) quit() """ if args.use_cuda: inputs, labels = inputs.cuda(), labels.cuda() logits = self.net(inputs) loss = self.loss_fn(logits, labels) _, pred = logits.max(1) correct = pred.eq(labels).sum().item() batch_acc = 100. * correct / labels.size(0) acc.update(batch_acc, 1) losses.update(loss.item(), inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if self.scheduler: self.scheduler.step() util.stats.train_acc.append(batch_acc) util.stats.train_loss.append(loss.item()) if batch % print_mod == print_mod - 1 or batch == self.n_batches - 1: log_str = '\nEpoch: [{}/{}]\tBatch: [{}/{}]\tLoss: {:.4f}\tAcc: {:.2f} % ({:.2f} %)'.format( epoch, args.n_epochs, batch, self.n_batches, loss.item(), batch_acc, acc.avg) misc.log(self.log_path, log_str)
def parallel_run_crossvalidation(sc: SparkContext, training, testing, optim: dict, cfg: dict): """ Parallel MapReduce implementation of crossvalidation process. The jobs are splitted in batches depending on the amount of resources available, it performs training and evaluation process in one worker. Parameters ---------- sc : SparkContext App context training : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment Training data or data config Testing : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment Eval/Testing data or data config optim: dict Optimization config cfg : dict Base config """ if (optim['num_workers'] < 2): raise Exception("MapReduce optimization needs at least 2 workers!") hcfgs = {} metric_series = [] for itrs in range(math.ceil(optim['max_iters'] / optim['num_workers'])): log(f"Running CV-{itrs} batch ({itrs * optim['num_workers']} - {(itrs+1) * optim['num_workers']})" ) # generate Iters / Num_Workers hyperconfigs mpr_hcfgs = sc.parallelize([ (_j, sample_random_hyperconfig(optim['grid'], cfg)) for _j in range(itrs * optim['num_workers'], (itrs + 1) * optim['num_workers']) ]) hcfgs.update(mpr_hcfgs.map(train_eval_mapper).collectAsMap()) metric_series = [(_k, _h['metric']) for _k, _h in hcfgs.items()] # convergence validation if itrs > 1: if has_converged(metric_series[-2][1], metric_series[-1][1], optim['convergence']): log(f"Optimization has converged in {itrs} batch iterations") break # best model selection based metric best_model = hcfgs[sorted( metric_series, key=lambda s: s[1], reverse=(optim['metric']['criteria'] == 'max'))[0][0]] log("Best performed model:\n", pformat(best_model)) cv_results_path = (Path(cfg['mdl_file']).parent / f'parallel_cv_2-{uuid.uuid4()}.json').as_posix() with open(cv_results_path, 'w') as f: f.write(json.dumps(hcfgs))
def serial_run_crossvalidation(sc: SparkContext, training, testing, optim: dict, cfg: dict): """ Serial implementation of crossvalidation process Parameters ---------- sc : SparkContext App context training : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment Training data or data config Testing : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment Eval/Testing data or data config optim: dict Optimization config cfg : dict Base config """ hcfgs = {} metric_series = [] for itrs in range(int(optim['max_iters'])): log(f"Running CV-{itrs}") _hcfg = sample_random_hyperconfig(optim['grid'], cfg) hcfgs[itrs] = _hcfg # instance and train model model = models[_hcfg['class']](sc, _hcfg) model.train(training, testing) model.save() # run evaluation in testing env _preds, metric = model.evaluate(testing) hcfgs[itrs]['metric'] = float(metric) # convergence validation if itrs > 1: if has_converged(metric, metric_series[-1][1], optim['convergence']): log(f"Optimization has converged in {itrs} iterations") break metric_series.append((itrs, metric)) # best model selection based metric best_model = hcfgs[sorted( metric_series, key=lambda s: s[1], reverse=(optim['metric']['criteria'] == 'max'))[0][0]] log("Best performed model:\n", pformat(best_model)) cv_results_path = (Path(cfg['mdl_file']).parent / f'single_cv-{uuid.uuid4()}.json').as_posix() with open(cv_results_path, 'w') as f: f.write(json.dumps(hcfgs))
def log(self, info, logfile='trainlog.txt'): log(info, logfilename=logfile, savepath=self.args.savepath)
----- sc : pyspark.SparkContext """ conf = SparkConf()\ .setAppName(APP_NAME)\ .setMaster("local[4]")\ .set("spark.executor.memory", "4g")\ .set("spark.executor.cores", "4")\ .set("spark.driver.cores", "2")\ .set("spark.driver.memory", "2g") sc = SparkContext(conf=conf) return sc if __name__ == '__main__': log(f"Starting {APP_NAME} evaluation ...") args = parse_predit_args() # load config cfg = load_conf() log(f"Using {cfg['class']}") # create spark sc = create_spark() st_time = time.time() # Load testing data testing = read_env(sc, args['test_file']) # Init model model = models[cfg['class']](sc, cfg) # Load model and eval model.load_model() model.evaluate(testing) log(f"Finished predicting in {time.time() - st_time}")
----- sc : pyspark.SparkContext """ conf = SparkConf()\ .setAppName(APP_NAME)\ .setMaster("local[4]")\ .set("spark.executor.memory", "4g")\ .set("spark.executor.cores", "4")\ .set("spark.driver.cores", "2")\ .set("spark.driver.memory", "2g") sc = SparkContext(conf=conf) return sc if __name__ == '__main__': log(f"Starting {APP_NAME} training ...") st_time = time.time() # load config cfg = load_conf() log(f"Using {cfg['class']}") # create spark sc = create_spark() # Load environment configuration training = read_env(sc, cfg['environment']) evaluation = read_env(sc, cfg['environment']) # Init model model = models[cfg['class']](sc, cfg) # Start training model.train(training, evaluation) model.save() log(f"Finished training in {time.time()- st_time }")
from utils.files import data_filename as fn from utils.misc import log from schedule.parser import text2schedule as parse_schedule from driver.extract import get_courses_and_classes_data, \ get_students, get_applications, get_parameters from driver.index import create_classes_and_courses_indexes, \ save_classes_and_courses, save_criteria, \ create_criteria_indexes, create_parameters_index, \ save_parameters, create_students_index, save_students assert len(sys.argv) == 3, 'execute.py <max_search> <default_parameter>' max_search = int(sys.argv[1]) default_parameter = int(sys.argv[2]) # DISCIPLINAS E TURMAS log('processando disciplinas e turmas...') classes_of_course, classes, criteria_list = get_courses_and_classes_data() course_codes_list = list(classes_of_course.keys()) save_classes_and_courses(course_codes_list, classes_of_course) save_criteria(criteria_list) criteria_expr2index, index2criteria_expr = create_criteria_indexes( criteria_list) with open(fn('disciplinas'), 'w') as f: for i, code in enumerate(course_codes_list): n = len(classes_of_course[code]) f.write(f'{i}:{n}\n') with open(fn('turmas'), 'w') as f: for i, code in enumerate(course_codes_list): for j, class_code in enumerate(classes_of_course[code]):
----- sc : pyspark.SparkContext """ conf = SparkConf()\ .setAppName(APP_NAME+"-"+optim_name)\ .setMaster(f"local[{exec_workers}]")\ .set("spark.executor.memory", "4g")\ .set("spark.executor.cores", f"{exec_workers}")\ .set("spark.driver.cores", "1")\ .set("spark.driver.memory", "2g") sc = SparkContext(conf=conf) return sc if __name__ == '__main__': log(f"Starting {APP_NAME} optimization ...") # read arguments args = parse_args() with open(args.optim_config, 'r') as f: optconfig = json.load(f) # load config cfg = load_conf(optconfig['config']) log(f"Using {cfg['class']}") # create spark sc = create_spark(optconfig['optim_name'], optconfig['num_workers']) st_time = time.time() # Load environment configuration training = read_env(sc, cfg['environment']) testing = read_env(sc, cfg['environment'], max_episode_steps=1000) # Run CV if args.parallelized: