def evaluate(self):
     acc, losses = util.get_meters(self.n_batches_eval, epoch=None)
     print_mod = int(self.n_batches_eval / args.epoch_reports)
     count = total = 0
     self.net.eval()
     with torch.no_grad():
         for batch, (inputs, labels) in enumerate(tqdm(self.testloader), 1):
             if args.use_cuda:
                 inputs, labels = inputs.cuda(), labels.cuda()
             logits = self.net(inputs)
             loss = self.loss_fn(logits, labels)
             losses.update(loss.item(), inputs.size(0))
             _, pred = logits.max(1)
             correct = pred.eq(labels).sum().item()
             batch_acc = 100. * correct / labels.size(0)
             count += correct
             total += labels.size(0)
             acc.update(batch_acc, labels.size(0))
             util.stats.test_loss.append(loss.item())
             util.stats.test_acc.append(batch_acc)
             if batch % print_mod == print_mod - 1:
                 log_str = '\nBatch: [{}/{}]\tLoss: {:.4f}\tAccuracy: {:.2f} % ({:.2f} %)'.format(
                     batch, self.n_batches_eval, loss.item(), batch_acc,
                     acc.avg)
                 misc.log(self.log_path, log_str)
         print('Acc {acc.avg:.3f} | Loss {losses.avg:.2e}'.format(
             acc=acc, losses=losses))
     return 100. * count / total
 def run_train(self):
     final_epoch = False
     for epoch in range(1, args.n_epochs + 1):
         misc.log(
             self.log_path, 'Elapsed Time: {}/{}\n'.format(
                 self.timer.measure(),
                 self.timer.measure(epoch / float(args.n_epochs))))
         if self.scheduler:
             lr = self.scheduler.get_lr()[0]
         else:
             lr = args.lr
         self.train(epoch)
         acc = self.evaluate()
         improvement = acc > self.best_acc
         self.best_acc = max(acc, self.best_acc)
         misc.log(
             self.log_path,
             'Best Accuracy: {} | Current Learning Rate: {}'.format(
                 np.round(self.best_acc, 5), np.round(lr, 5)))
         if epoch == args.n_epochs:
             final_epoch = True
         if args.save:
             misc.save_model(args=args,
                             model_name=self.model_name,
                             best_acc=self.best_acc,
                             stats=util.stats,
                             state={
                                 'epoch': epoch,
                                 'state_dict': self.net.state_dict(),
                                 'best_acc': self.best_acc,
                                 'optimizer': self.optimizer.state_dict()
                             },
                             improvement=improvement,
                             epoch=epoch,
                             final_epoch=final_epoch)
 def __init__(self):
     if args.save:
         self.model_name = misc.name_model(args)
         with open('{name}/parameters.txt'.format(name=self.model_name),
                   'w+') as f:
             f.write(str(args))
         self.log_path = '{name}/log.txt'.format(name=self.model_name)
     else:
         self.log_path = './log.txt'
     misc.log(self.log_path, str(vars(args)))
     trainloader, self.testloader = datasets.__dict__[args.dataset](
         args, train=True, test=True)
     if not args.reload:
         self.trainloader = trainloader
     self.n_batches = len(trainloader)
     self.n_batches_eval = len(self.testloader)
     self.net = util.build_neuralnet()
     Util.network_summary(net=self.net,
                          input_shape=(3, args.resolution, args.resolution),
                          batch_size=args.batch_size,
                          device='cuda' if args.use_cuda else 'cpu')
     self.loss_fn = util.cost()
     self.optimizer, self.scheduler = util.build_optimizer(
         net=self.net, n_batches=self.n_batches)
     self.best_acc = -np.inf
     self.timer = Meters.Timer()
Ejemplo n.º 4
0
    def evaluate(self, test, num_eval_episodes=None):
        """ Evaluate method

            Params:
            ----
            test: tf_agents.environments.TFPyEnvironment
                Test environments
        """
        if num_eval_episodes is None:
            num_eval_episodes = self.num_eval_episodes
        eval_avg_return = compute_avg_return(test, self.policy,
                                             num_eval_episodes)
        log("Eval Avg Reward:", eval_avg_return)
        return (eval_avg_return, eval_avg_return)
Ejemplo n.º 5
0
 def _train(self):
     """ Private training method
     """
     # tf function wrapper
     self.agent.train = common.function(self.agent.train)
     # Reset the train step
     self.agent.train_step_counter.assign(0)
     # Evaluate the agent's policy once before training.
     avg_return = compute_avg_return(self.eval_env, self.agent.policy,
                                     self.num_eval_episodes)
     self.eval_rewards.append(avg_return)
     # train loop
     for _ in range(int(self.train_iters)):
         # Collect a few steps using collect_policy and save to the replay buffer.
         for _s in range(self.collect_steps_per_iter):
             collect_step(self.train_env, self.agent.collect_policy,
                          self.replay_buffer)
         # Sample a batch of data from the buffer and update the agent's network.
         experience, unused_info = next(self.iterator)
         train_loss = self.agent.train(experience).loss
         step = self.agent.train_step_counter.numpy()
         if step % self.log_interval == 0:
             log('step = {0}: loss = {1}'.format(step, train_loss))
         if step % self.eval_interval == 0:
             avg_return = compute_avg_return(self.eval_env,
                                             self.agent.policy,
                                             self.num_eval_episodes)
             log('step = {0}: Average Return = {1}'.format(
                 step, avg_return))
             self.eval_rewards.append(avg_return)
     log("Best episode avg reward:", max(self.eval_rewards))
 def train(self, epoch):
     if args.reload:
         trainloader = datasets.__dict__[args.dataset](args,
                                                       train=True,
                                                       test=False)
     else:
         trainloader = self.trainloader
     acc, losses = util.get_meters(self.n_batches, epoch=epoch)
     print_mod = int(self.n_batches / args.epoch_reports)
     self.net.train()
     batch = -1
     for inputs, labels in tqdm(trainloader, total=self.n_batches):
         batch += 1
         """
         from imageio import imwrite
         import torchvision
         imwrite(uri='./data/train_instance8x8.png', im=np.transpose(torchvision.utils.make_grid(inputs, nrow=16, padding=0).numpy(), (1, 2, 0)))
         quit()
         """
         if args.use_cuda:
             inputs, labels = inputs.cuda(), labels.cuda()
         logits = self.net(inputs)
         loss = self.loss_fn(logits, labels)
         _, pred = logits.max(1)
         correct = pred.eq(labels).sum().item()
         batch_acc = 100. * correct / labels.size(0)
         acc.update(batch_acc, 1)
         losses.update(loss.item(), inputs.size(0))
         self.optimizer.zero_grad()
         loss.backward()
         self.optimizer.step()
         if self.scheduler:
             self.scheduler.step()
         util.stats.train_acc.append(batch_acc)
         util.stats.train_loss.append(loss.item())
         if batch % print_mod == print_mod - 1 or batch == self.n_batches - 1:
             log_str = '\nEpoch: [{}/{}]\tBatch: [{}/{}]\tLoss: {:.4f}\tAcc: {:.2f} % ({:.2f} %)'.format(
                 epoch, args.n_epochs, batch, self.n_batches, loss.item(),
                 batch_acc, acc.avg)
             misc.log(self.log_path, log_str)
Ejemplo n.º 7
0
def parallel_run_crossvalidation(sc: SparkContext, training, testing,
                                 optim: dict, cfg: dict):
    """ Parallel MapReduce implementation of crossvalidation process.
        The jobs are splitted in batches depending on the amount of resources
        available, it performs training and evaluation process in one worker.
        
    Parameters
    ----------
    sc : SparkContext
        App context
    training : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment
        Training data or data config
    Testing : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment
        Eval/Testing data or data config
    optim: dict
        Optimization config
    cfg : dict
        Base config
    """
    if (optim['num_workers'] < 2):
        raise Exception("MapReduce optimization needs at least 2 workers!")
    hcfgs = {}
    metric_series = []
    for itrs in range(math.ceil(optim['max_iters'] / optim['num_workers'])):
        log(f"Running CV-{itrs} batch ({itrs * optim['num_workers']} - {(itrs+1) * optim['num_workers']})"
            )
        # generate Iters / Num_Workers hyperconfigs
        mpr_hcfgs = sc.parallelize([
            (_j, sample_random_hyperconfig(optim['grid'], cfg))
            for _j in range(itrs * optim['num_workers'], (itrs + 1) *
                            optim['num_workers'])
        ])
        hcfgs.update(mpr_hcfgs.map(train_eval_mapper).collectAsMap())
        metric_series = [(_k, _h['metric']) for _k, _h in hcfgs.items()]
        # convergence validation
        if itrs > 1:
            if has_converged(metric_series[-2][1], metric_series[-1][1],
                             optim['convergence']):
                log(f"Optimization has converged in {itrs} batch iterations")
                break
    # best model selection based metric
    best_model = hcfgs[sorted(
        metric_series,
        key=lambda s: s[1],
        reverse=(optim['metric']['criteria'] == 'max'))[0][0]]
    log("Best performed model:\n", pformat(best_model))
    cv_results_path = (Path(cfg['mdl_file']).parent /
                       f'parallel_cv_2-{uuid.uuid4()}.json').as_posix()
    with open(cv_results_path, 'w') as f:
        f.write(json.dumps(hcfgs))
Ejemplo n.º 8
0
def serial_run_crossvalidation(sc: SparkContext, training, testing,
                               optim: dict, cfg: dict):
    """ Serial implementation of crossvalidation process 

    Parameters
    ----------
    sc : SparkContext
        App context
    training : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment
        Training data or data config
    Testing : pyspark.rdd.RDD | tf_agents.environments.TFPyEnvironment
        Eval/Testing data or data config
    optim: dict
        Optimization config
    cfg : dict
        Base config
    """
    hcfgs = {}
    metric_series = []
    for itrs in range(int(optim['max_iters'])):
        log(f"Running CV-{itrs}")
        _hcfg = sample_random_hyperconfig(optim['grid'], cfg)
        hcfgs[itrs] = _hcfg
        # instance and train model
        model = models[_hcfg['class']](sc, _hcfg)
        model.train(training, testing)
        model.save()
        # run evaluation in testing env
        _preds, metric = model.evaluate(testing)
        hcfgs[itrs]['metric'] = float(metric)
        # convergence validation
        if itrs > 1:
            if has_converged(metric, metric_series[-1][1],
                             optim['convergence']):
                log(f"Optimization has converged in {itrs} iterations")
                break
        metric_series.append((itrs, metric))
    # best model selection based metric
    best_model = hcfgs[sorted(
        metric_series,
        key=lambda s: s[1],
        reverse=(optim['metric']['criteria'] == 'max'))[0][0]]
    log("Best performed model:\n", pformat(best_model))
    cv_results_path = (Path(cfg['mdl_file']).parent /
                       f'single_cv-{uuid.uuid4()}.json').as_posix()
    with open(cv_results_path, 'w') as f:
        f.write(json.dumps(hcfgs))
Ejemplo n.º 9
0
 def log(self, info, logfile='trainlog.txt'):
     log(info, logfilename=logfile, savepath=self.args.savepath)
Ejemplo n.º 10
0
        -----
        sc : pyspark.SparkContext
    """
    conf = SparkConf()\
        .setAppName(APP_NAME)\
        .setMaster("local[4]")\
        .set("spark.executor.memory", "4g")\
        .set("spark.executor.cores", "4")\
        .set("spark.driver.cores",  "2")\
        .set("spark.driver.memory", "2g")
    sc = SparkContext(conf=conf)
    return sc


if __name__ == '__main__':
    log(f"Starting {APP_NAME} evaluation ...")
    args = parse_predit_args()
    # load config
    cfg = load_conf()
    log(f"Using {cfg['class']}")
    # create spark
    sc = create_spark()
    st_time = time.time()
    # Load testing data
    testing = read_env(sc, args['test_file'])
    # Init model
    model = models[cfg['class']](sc, cfg)
    # Load model  and eval
    model.load_model()
    model.evaluate(testing)
    log(f"Finished predicting in {time.time() - st_time}")
Ejemplo n.º 11
0
        -----
        sc : pyspark.SparkContext
    """
    conf = SparkConf()\
        .setAppName(APP_NAME)\
        .setMaster("local[4]")\
        .set("spark.executor.memory", "4g")\
        .set("spark.executor.cores", "4")\
        .set("spark.driver.cores",  "2")\
        .set("spark.driver.memory", "2g")
    sc = SparkContext(conf=conf)
    return sc


if __name__ == '__main__':
    log(f"Starting {APP_NAME} training ...")
    st_time = time.time()
    # load config
    cfg = load_conf()
    log(f"Using {cfg['class']}")
    # create spark
    sc = create_spark()
    # Load environment configuration
    training = read_env(sc, cfg['environment'])
    evaluation = read_env(sc, cfg['environment'])
    # Init model
    model = models[cfg['class']](sc, cfg)
    # Start training
    model.train(training, evaluation)
    model.save()
    log(f"Finished training in {time.time()- st_time }")
Ejemplo n.º 12
0
from utils.files import data_filename as fn
from utils.misc import log
from schedule.parser import text2schedule as parse_schedule
from driver.extract import get_courses_and_classes_data, \
    get_students, get_applications, get_parameters
from driver.index import create_classes_and_courses_indexes, \
    save_classes_and_courses, save_criteria, \
    create_criteria_indexes, create_parameters_index, \
    save_parameters, create_students_index, save_students

assert len(sys.argv) == 3, 'execute.py  <max_search>  <default_parameter>'
max_search = int(sys.argv[1])
default_parameter = int(sys.argv[2])

# DISCIPLINAS E TURMAS
log('processando disciplinas e turmas...')
classes_of_course, classes, criteria_list = get_courses_and_classes_data()
course_codes_list = list(classes_of_course.keys())
save_classes_and_courses(course_codes_list, classes_of_course)
save_criteria(criteria_list)
criteria_expr2index, index2criteria_expr = create_criteria_indexes(
    criteria_list)

with open(fn('disciplinas'), 'w') as f:
    for i, code in enumerate(course_codes_list):
        n = len(classes_of_course[code])
        f.write(f'{i}:{n}\n')

with open(fn('turmas'), 'w') as f:
    for i, code in enumerate(course_codes_list):
        for j, class_code in enumerate(classes_of_course[code]):
Ejemplo n.º 13
0
        -----
        sc : pyspark.SparkContext
    """
    conf = SparkConf()\
        .setAppName(APP_NAME+"-"+optim_name)\
        .setMaster(f"local[{exec_workers}]")\
        .set("spark.executor.memory", "4g")\
        .set("spark.executor.cores", f"{exec_workers}")\
        .set("spark.driver.cores",  "1")\
        .set("spark.driver.memory", "2g")
    sc = SparkContext(conf=conf)
    return sc


if __name__ == '__main__':
    log(f"Starting {APP_NAME} optimization ...")
    # read arguments
    args = parse_args()
    with open(args.optim_config, 'r') as f:
        optconfig = json.load(f)
    # load config
    cfg = load_conf(optconfig['config'])
    log(f"Using {cfg['class']}")
    # create spark
    sc = create_spark(optconfig['optim_name'], optconfig['num_workers'])
    st_time = time.time()
    # Load environment configuration
    training = read_env(sc, cfg['environment'])
    testing = read_env(sc, cfg['environment'], max_episode_steps=1000)
    # Run CV
    if args.parallelized: