Ejemplo n.º 1
0
class CometMLLogger:
    def __init__(self):
        self.experiment = Experiment(api_key="iU4f44llKnowZwmrEo9wfR2ch",
                                     project_name="general",
                                     workspace="yahyaalaamassoud",
                                     log_code=False,
                                     log_graph=False)

    def log_params(self, params: Dict[str, int]):
        self.experiment.log_parameters(params)

    def log_metric(self, metric_name, metric_val, step=None):
        self.experiment.log_metric(metric_name, metric_val, step=step)

    def log_metrics(self, metrics: Dict[str, float], step=None):
        self.experiment.log_metrics(metrics, step=step)

    def log_figure(self, figure_name: str, step: str):
        self.experiment.log_image(image_data=self.__savefig(),
                                  name=figure_name,
                                  step=step,
                                  overwrite=False)

    def __savefig(self):
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        return Image.open(io.BytesIO(buf.getvalue()))
Ejemplo n.º 2
0
def fit_validate(exp_params, k, data_path, write_path, others=None, custom_tag=''):
    """Fit model and compute metrics on train and validation set. Intended for hyperparameter search.

    Only logs final metrics and scatter plot of final embedding.

    Args:
        exp_params(dict): Parameter dict. Should at least have keys model_name, dataset_name & random_state. Other
        keys are assumed to be model parameters.
        k(int): Fold identifier.
        data_path(str): Data directory.
        write_path(str): Where to write temp files.
        others(dict): Other things to log to Comet experiment.
        custom_tag(str): Custom tag for comet experiment.

    """
    # Comet experiment
    exp = Experiment(parse_args=False)
    exp.disable_mp()
    custom_tag += '_validate'
    exp.add_tag(custom_tag)
    exp.log_parameters(exp_params)

    if others is not None:
        exp.log_others(others)

    # Parse experiment parameters
    model_name, dataset_name, random_state, model_params = parse_params(exp_params)

    # Fetch and split dataset.
    data_train = getattr(grae.data, dataset_name)(split='train', random_state=random_state, data_path=data_path)
    data_train, data_val = data_train.validation_split(random_state=FOLD_SEEDS[k])

    # Model
    m = getattr(grae.models, model_name)(random_state=FOLD_SEEDS[k], **model_params)
    m.write_path = write_path
    m.data_val = data_val

    with exp.train():
        m.fit(data_train)

        # Log plot
        m.comet_exp = exp
        m.plot(data_train, data_val, title=f'{model_name} : {dataset_name}')

        # Probe embedding
        prober = EmbeddingProber()
        prober.fit(model=m, dataset=data_train, mse_only=True)
        train_z, train_metrics = prober.score(data_train, is_train=True)

        # Log train metrics
        exp.log_metrics(train_metrics)

    with exp.validate():
        val_z, val_metrics = prober.score(data_val)

        # Log train metrics
        exp.log_metrics(val_metrics)

    # Log marker to mark successful experiment
    exp.log_other('success', 1)
Ejemplo n.º 3
0
def log_metrics(metrics: dict, comet_logger: Experiment, epoch: int,
                context_val: bool):
    if context_val:
        with comet_logger.validate():
            comet_logger.log_metrics(metrics, epoch=epoch)
    else:
        with comet_logger.train():
            comet_logger.log_metrics(metrics, epoch=epoch)
def generate_categories():
    # capture the config path from the run arguments then process the json configuration file
    try:
        args = get_args()
        config = process_config(args.config)
    except ValueError:
        print("Missing or invalid arguments")
        exit(0)

    print("Logging experiment name: {name}".format(
        name=config.experiment.experiment_name))
    experiment = Experiment(api_key=config.experiment.api_key,
                            project_name=config.experiment.project_name,
                            workspace=config.experiment.workspace)
    experiment.set_name(config.experiment.experiment_name)

    print('Creating the data loader...')
    data_loader = DataLoader(config.defects_summarizer.paths)
    train_data, test_data = data_loader.get_data()

    print('Creating the Preprocessor...')
    preprocessor = CorexPreprocessor(train_data, config)
    preprocessor.prepare_data()

    print('Loading and evaluating the Model...')
    model = CorexModel(config.defects_summarizer, preprocessor, seed=False)
    trainer = CorexTrainer(model, preprocessor.get_data())
    trainer.train()
    trainer.generate_topics()
    top_docs_df = trainer.get_top_documents(
        config.defects_summarizer.evaluate.extract_topics,
        preprocessor.get_raw_corpus(),
        config.defects_summarizer.evaluate.extraction_quantile,
        labels=True)
    top_docs_df.to_csv(config.defects_summarizer.paths.save_data_path)

    print('Saving the trained topic model...')
    model.save()

    print('Preprocessing the summarizer...')
    summary_preprocessor = TextRankPreprocessor(
        top_docs_df, n_docs=config.defects_summarizer.evaluate.n_docs)
    summary_preprocessor.prepare_data()

    print('Loading and evaluating the summarizer...')
    summary_model = TextRankModel(config)
    summary_trainer = TextRankTrainer(summary_model, summary_preprocessor)
    avg_prec, avg_recall, avg_f1 = summary_trainer.train_and_evaluate(
        test_data)

    # Log the rest of the experiment
    metrics = {"precision": avg_prec, "recall": avg_recall, "f1": avg_f1}
    experiment.log_metrics(metrics)

    experiment.log_model(
        name=config.experiment.model_name,
        file_or_folder=config.labels_generator.paths.save_model_path)
Ejemplo n.º 5
0
 def pretrain(self, save_filename, patience):
     ''' Train & validate the model on full training dataset (all pilots) from scratch. 
     
         Inputs:
             - save_filename: (string: 'weights_save_filename.h5').
             - patience: Number of epochs without improvement before training stops (int).
         Output:
             - hist: Contains loss, acc, val_loss, val_acc metrics over epochs. (dictionnary) 
                 
     '''
     X_train, y_train, X_valid, y_valid, X_test, y_test = [], [], [], [], [], []
     
     if self.log_pilots:
         experiment = Experiment(api_key='cSZq9kuH2I87ezvm2dEWTx6op', project_name='All pilots', log_code=False, auto_param_logging=False)
     
     for pilot_idx in range(1, self.n_pilots + 1):
         X_train_pilot, y_train_pilot, X_valid_pilot, y_valid_pilot, X_test_pilot, y_test_pilot = self.load_data(pilot_idx, valid_ratio=0.2)
         
         # Stacking training/validation datasets of each pilot into a big dataset
         if len(X_train)==0 and len(X_valid)==0:
             X_train = X_train_pilot
             y_train = y_train_pilot
             X_valid = X_valid_pilot
             y_valid = y_valid_pilot
             X_test = X_test_pilot
             y_test = y_test_pilot
         else:        
             X_train = np.concatenate([X_train, X_train_pilot], axis=0)
             y_train = np.concatenate([y_train, y_train_pilot], axis=0)
             X_valid = np.concatenate([X_valid, X_valid_pilot], axis=0)
             y_valid = np.concatenate([y_valid, y_valid_pilot], axis=0)
             X_test = np.concatenate([X_test, X_test_pilot], axis=0)
             y_test = np.concatenate([y_test, y_test_pilot], axis=0)
             
     # Shuffle
     X_train, y_train = shuffle(X_train, y_train, random_state=0)
     X_valid, y_valid = shuffle(X_valid, y_valid, random_state=0)
     X_test, y_test = shuffle(X_test, y_test, random_state=0)
     
     # Build model
     self.model = self.build_model(load_weights=False)
     
     # Train on all pilots
     hist, _ = self.train(X_train, y_train, X_valid, y_valid, save_filename, patience)
     
     # Get some metrics
     score = self.test('all', save_filename, X_train, y_train, X_test, y_test, False)
     
     if self.log_pilots:
         experiment.log_metrics({'Test accuracy' : score})
         experiment.end()
     
     return hist
def generate_topics():
    # capture the config path from the run arguments then process the json configuration file
    try:
        args = get_args()
        config = process_config(args.config)
    except ValueError:
        print("Missing or invalid arguments")
        exit(0)

    print("Logging experiment name: {name}".format(
        name=config.experiment.experiment_name))
    experiment = Experiment(api_key=config.experiment.api_key,
                            project_name=config.experiment.project_name,
                            workspace=config.experiment.workspace)
    experiment.set_name(config.experiment.experiment_name)
    params = config.labels_generator.model
    experiment.log_parameters(params)

    print('Creating the data loader...')
    data_loader = DataLoader(config.labels_generator.paths)
    data = data_loader.get_data()

    print('Creating the Preprocessor...')
    preprocessor = CorexPreprocessor(data, config)
    preprocessor.prepare_data()

    print('Creating and training the Model...')
    model = CorexModel(config, preprocessor)
    trainer = CorexTrainer(model, preprocessor.get_data())
    trainer.train()

    print('Evaluating the model...')
    coherence_lst, avg_coherence = trainer.evaluate(preprocessor.get_data(),
                                                    preprocessor.get_corpus())
    trainer.generate_topics()
    print("Coherence score: {score_lst} \nAvg coherence score: {avg_score}".
          format(score_lst=coherence_lst, avg_score=avg_coherence))

    print('Saving the trained model...')
    model.save()

    # Log the rest of the experiment
    metrics = {"coherence": avg_coherence}
    experiment.log_metrics(metrics)

    experiment.log_model(
        name=config.experiment.model_name,
        file_or_folder=config.labels_generator.paths.save_model_path)
Ejemplo n.º 7
0
class CometLogger(LightningLoggerBase):
    def __init__(self, *args, **kwargs):
        super(CometLogger, self).__init__()
        self.experiment = CometExperiment(*args, **kwargs)

    @rank_zero_only
    def log_hyperparams(self, params):
        self.experiment.log_parameters(vars(params))

    @rank_zero_only
    def log_metrics(self, metrics, step_num):
        # self.experiment.set_epoch(self, metrics.get('epoch', 0))
        self.experiment.log_metrics(metrics)

    @rank_zero_only
    def finalize(self, status):
        self.experiment.end()
Ejemplo n.º 8
0
class Logger:
    def __init__(self, send_logs, tags, parameters):
        self.send_logs = send_logs
        if self.send_logs:
            self.experiment = Experiment(api_key="OZwyhJHyqzPZgHEpDFL1zxhyI",
                                         project_name="drilling-the-hole",
                                         workspace="wwydmanski")
        self.sent_mb = 0

        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)

    def begin_logging(self, episode_count, steps_per_ep):
        if self.send_logs:
            self.experiment.log_parameter("Episode count", episode_count)
            self.experiment.log_parameter("Max steps per episode",
                                          steps_per_ep)

    def log_round(self, actions, reward, cumulative_reward, angle, loss, step):
        if self.send_logs:
            self.experiment.log_metric("Round reward", reward, step=step)
            self.experiment.log_metric("Per-ep reward",
                                       cumulative_reward,
                                       step=step)
            self.experiment.log_metric("Action 1", actions[0], step=step)
            self.experiment.log_metric("Action 2", actions[1], step=step)
            self.experiment.log_metric("Current angle", angle, step=step)

            self.experiment.log_metrics(loss, step=step)

    def log_episode(self, cumulative_reward, state, step):
        if self.send_logs:
            self.experiment.log_metric("Angle", state[0], step=step)
            self.experiment.log_metric("Goal", state[1], step=step)
            self.experiment.log_metric("Cumulative reward",
                                       cumulative_reward,
                                       step=step)

    def end(self):
        if self.send_logs:
            self.experiment.end()
Ejemplo n.º 9
0
 def _log_final_metrics(self, experiment: Experiment, model: Sequential,
                        data: DataSets,
                        preprocessing_fnc: Callable[[np.ndarray],
                                                    np.ndarray]):
     scores = model.evaluate(preprocessing_fnc(data.x_train),
                             data.y_train,
                             verbose=2)
     experiment.log_metrics({
         "loss": scores[0],
         "acc": scores[1]
     },
                            prefix="train")
     scores = model.evaluate(preprocessing_fnc(data.x_dev),
                             data.y_dev,
                             verbose=2)
     experiment.log_metrics({
         "loss": scores[0],
         "acc": scores[1]
     },
                            prefix="dev")
     timer = ElapsedTime("Test prediction")
     with timer:
         scores = model.evaluate(preprocessing_fnc(data.x_test),
                                 data.y_test,
                                 verbose=2)
     experiment.log_metric("test_inference_time", timer.elapsed_time_ms)
     experiment.log_metrics({
         "loss": scores[0],
         "acc": scores[1]
     },
                            prefix="test")
class BaseLogger:
    def __init__(self, log_interval, train_len):
        self.log_interval = log_interval
        self.train_len = train_len
        self.metrics = {}
        self.__cometml_api_key = os.environ.get("COMETML_API_KEY")
        if self.__cometml_api_key:
            self.experiment = Experiment(self.__cometml_api_key,
                                         project_name="sirius-adversarial-attack")

    def log_test(self, engine):
        self.metrics = engine.state.metrics
        logging.info('----------------------------------------')
        logging.info(f'TEST: Epoch:[{engine.state.epoch}]')
        logging.info(f', '.join([
            f'{name}: {self.metrics[name]:.5f}' for name in self.metrics
        ]))
        logging.info(f'----------------------------------------')
        if self.__cometml_api_key:
            self.experiment.log_metrics(self.metrics, prefix='Test')

    def log_train(self, engine):
        if engine.state.iteration % self.train_len % self.log_interval == 0:
            logging.info("Epoch[{}] Iteration[{}/{}] Loss: {:.5f}"
                         .format(engine.state.epoch, engine.state.iteration %
                                 self.train_len if engine.state.iteration
                                 % self.train_len else self.train_len,
                                 self.train_len, engine.state.output))
            if self.__cometml_api_key:
                logs_dict = {"loss": engine.state.output}
                self.experiment.log_metrics(logs_dict,
                                            step=engine.state.iteration,
                                            epoch=engine.state.epoch,
                                            prefix='Train')

    def log_hparams(self, hparams_dict):
        hparams_dict.update(self.metrics)
        if self.__cometml_api_key:
            self.experiment.log_parameters(hparams_dict)
Ejemplo n.º 11
0
class CometLogger(BaseLogger):
    def __init__(self, experiment_id=None):
        self.experiment = Experiment(auto_metric_logging=False)
        if experiment_id is not None:
            self.experiment.log_parameter('experiment_id', experiment_id)

    def add_scalar(self, name, value, step):
        self.experiment.log_metric(name, value, epoch=step)

    def log_parameters(self, params_dict):
        self.experiment.log_parameters(params_dict)

    def log_metrics(self, metrics_dict, epoch):
        self.experiment.log_metrics(metrics_dict, epoch=epoch)

    def add_text(self, name, text):
        self.experiment.log_text(f'{name}: {text}')

    def set_context_prefix(self, prefix):
        self.experiment.context = prefix

    def reset_context_prefix(self):
        self.experiment.context = None
Ejemplo n.º 12
0
 def log(self, experiment=None):
     ''' Export all logs in the Comet.ml environment.
         See https://www.comet.ml/ for more details
     '''
     
     # Initialize Comet.ml experience (naming, tags) for automatic logging
     project_name = 'Optimization' if self.comet_optimize else 'Summary'
     experiment_name = '{} - {} '.format(self.model_name, str(self.batch_size)) + ('ES+' if self.train_after_es else '')
     experiment_tags = [ self.model_name, self.monitor_val ] + (['ES+'] if self.train_after_es else []) +  (['Pre-train'] if self.pretraining else [])
     
     if experiment == None:
         experiment = Experiment(api_key='cSZq9kuH2I87ezvm2dEWTx6op', project_name=project_name, log_code=False, auto_param_logging=False, auto_metric_logging=False)
     experiment.set_name(experiment_name)
     experiment.add_tags(experiment_tags)
     
     # Export hyperparameters
     experiment.log_parameters(self.dataloader_params)
     experiment.log_parameters(self.training_params)   
     
     # Export metrics values
     experiment.log_metrics({'Average accuracy' : np.mean(self.test_score['accuracy']), 'Std accuracy' : np.std(self.test_score['accuracy'])})
     
     # Export metrics graphs for each pilot (accuracy, loss, confusion matrix)
     [ experiment.log_figure(figure_name='Confusion matrix {}'.format(pilot_idx), figure=plot_cm(self.conf_matrices, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     [ experiment.log_figure(figure_name='Loss pilot {}'.format(pilot_idx), figure=plot_loss(self.histories[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     fig, ax = plt.subplots(figsize=(10,6))
     plot_full_barchart(self.test_score, n_pilots=self.n_pilots, title=' {} ConvNet model'.format(self.model_name), fig=fig)
     experiment.log_figure(figure_name='Accuracy barchart', figure=fig)
     
     if self.train_after_es:
         [ experiment.log_figure(figure_name='Loss pilot {} (ES+)'.format(pilot_idx), figure=plot_loss(self.histories_es[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     # Export model weights for each pilot
     [ experiment.log_asset('{}{}.h5'.format(self.weights_savename_prefix, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     experiment.end()
Ejemplo n.º 13
0
class CometMLLogger(ExperimentLogger):
    def __init__(self, provider_args: EasyDict, config, **kwargs):
        self.experiment = Experiment(api_key=provider_args.api_key,
                                     project_name=provider_args.project_name,
                                     workspace=provider_args.workspace,
                                     auto_param_logging=False,
                                     auto_metric_logging=False)
        super().__init__(config)
        self.run_key = self.experiment.get_key()
        self.log_url = self.experiment.url

    def log_on_hyperparameters(self, config: EasyDict):
        hyper_params = {}
        if config is not None:
            hyper_params['model'] = config.model
            hyper_params['trainer'] = config.trainer
            if 'train' in config.dataset and 'augmentations' in config.dataset.train:
                hyper_params[
                    'augmentations'] = config.dataset.train.augmentations
        self.experiment.log_parameters(flatten(hyper_params, reducer='path'))

    def log_on_step_update(self, metrics_log: dict):
        step = metrics_log['step']
        metrics_log.pop('step')
        self.experiment.log_metrics(metrics_log, step=step)

    def log_on_epoch_update(self, metrics_log: dict):
        epoch = metrics_log['epoch']
        metrics_log.pop('epoch')
        self.experiment.log_metrics(metrics_log, epoch=epoch)

    def log_on_model_save(self, file_log: dict):
        pass

    def log_on_validation_result(self, metrics_log: dict):
        epoch = metrics_log['epoch']
        metrics_log.pop('epoch')
        self.experiment.log_metrics(metrics_log, epoch=epoch)
Ejemplo n.º 14
0
                    g_opt_op, g_current_loss_tensor = g_train_optimizer_ops[sizes.index(current_resolution)]
                elif current_mode == 'stabilize':
                    d_opt_op, d_current_loss_tensor = d_stabilize_optimizer_ops[sizes.index(current_resolution)]
                    g_opt_op, g_current_loss_tensor = g_stabilize_optimizer_ops[sizes.index(current_resolution)]

                for i in range(d_steps):
                    try:
                        _, D_l = session.run([d_opt_op, d_current_loss_tensor])
                    except tf.errors.InvalidArgumentError as e:
                        print('jpeg error: ' + str(e))

                for i in range(g_steps):
                    _, G_l = session.run([g_opt_op, g_current_loss_tensor])

                if step % 10 == 0:
                    experiment.log_metrics({'d_loss': D_l, 'g_loss': G_l, 'current_resolution': current_resolution,
                                            'current_mode': (0 if current_mode == 'train' else 1), 'time_to_res_schedule_update': current_resolution_schedule_period_length - (time.time() - last_schedule_update_time)})
                    # experiment.log_metric("d_loss", D_l)
                    # experiment.log_metric("g_loss", G_l)
                    # experiment.log_metric("current_resolution", current_resolution)
                    # experiment.log_metric("current_mode", 0 if current_mode == 'train' else 1)

                if np.isnan(D_l) or np.isnan(G_l):
                    print('loss is NaN.')
                    exit()

                if step % 1000 == 0:
                    print('epoch: {} step: {} G_loss: {} D_loss: {}'.format(epoch, step, G_l, D_l))
                    # Save figure

                    sampled_images = session.run([samples_for_all_resolutions[sizes.index(current_resolution)]])[0]
Ejemplo n.º 15
0
    def train(self):

        # comet_ml
        # Create an experiment
        experiment = Experiment(api_key="B6hzNydshIpZSG2Xi9BDG9gdG",
                                project_name="glow-mnist", workspace="voletiv")
        hparams_dict = self.hparams_dict()
        experiment.log_parameters(hparams_dict)

        # set to training state
        self.graph.train()
        self.global_step = self.loaded_step

        # begin to train
        for epoch in range(self.n_epoches):
            print("epoch", epoch)
            progress = tqdm(self.data_loader)
            for i_batch, batch in enumerate(progress):

                experiment.set_step(self.global_step)

                # update learning rate
                lr = self.lrschedule["func"](global_step=self.global_step,
                                             **self.lrschedule["args"])
                for param_group in self.optim.param_groups:
                    param_group['lr'] = lr
                self.optim.zero_grad()

                # log
                if self.global_step % self.scalar_log_gaps == 0:
                    # self.writer.add_scalar("lr/lr", lr, self.global_step)
                    experiment.log_metrics({"lr": lr, "epoch": epoch+i_batch/len(self.data_loader)})

                # get batch data
                for k in batch:
                    batch[k] = batch[k].to(self.data_device)
                x = batch["x"]
                y = None
                y_onehot = None
                if self.y_condition:
                    if self.y_criterion == "multi-classes":
                        assert "y_onehot" in batch, "multi-classes ask for `y_onehot` (torch.FloatTensor onehot)"
                        y_onehot = batch["y_onehot"]
                    elif self.y_criterion == "single-class":
                        assert "y" in batch, "single-class ask for `y` (torch.LongTensor indexes)"
                        y = batch["y"]
                        y_onehot = thops.onehot(y, num_classes=self.y_classes)

                # at first time, initialize ActNorm
                if self.global_step == 0:
                    self.graph(x[:self.batch_size // len(self.devices), ...],
                               y_onehot[:self.batch_size // len(self.devices), ...] if y_onehot is not None else None)

                # parallel
                if len(self.devices) > 1 and not hasattr(self.graph, "module"):
                    print("[Parallel] move to {}".format(self.devices))
                    self.graph = torch.nn.parallel.DataParallel(self.graph, self.devices, self.devices[0])

                # forward phase
                z, nll, y_logits = self.graph(x=x, y_onehot=y_onehot)

                # loss_generative
                loss_generative = Glow.loss_generative(nll)

                # loss_classes
                loss_classes = 0
                if self.y_condition:
                    loss_classes = (Glow.loss_multi_classes(y_logits, y_onehot)
                                    if self.y_criterion == "multi-classes" else
                                    Glow.loss_class(y_logits, y))

                # total loss
                loss = loss_generative + loss_classes * self.weight_y

                # log
                if self.global_step % self.scalar_log_gaps == 0:
                    # self.writer.add_scalar("loss/loss_generative", loss_generative, self.global_step)
                    experiment.log_metrics({"loss_generative": loss_generative})
                    if self.y_condition:
                        # self.writer.add_scalar("loss/loss_classes", loss_classes, self.global_step)
                        experiment.log_metrics({"loss_classes": loss_classes, "total_loss": loss})

                # backward
                self.graph.zero_grad()
                self.optim.zero_grad()
                loss.backward()

                # operate grad
                if self.max_grad_clip is not None and self.max_grad_clip > 0:
                    torch.nn.utils.clip_grad_value_(self.graph.parameters(), self.max_grad_clip)
                if self.max_grad_norm is not None and self.max_grad_norm > 0:
                    grad_norm = torch.nn.utils.clip_grad_norm_(self.graph.parameters(), self.max_grad_norm)
                    if self.global_step % self.scalar_log_gaps == 0:
                        # self.writer.add_scalar("grad_norm/grad_norm", grad_norm, self.global_step)
                        experiment.log_metrics({"grad_norm": grad_norm})

                # step
                self.optim.step()

                # checkpoints
                if self.global_step % self.checkpoints_gap == 0 and self.global_step > 0:
                    save(global_step=self.global_step,
                         graph=self.graph,
                         optim=self.optim,
                         pkg_dir=self.checkpoints_dir,
                         is_best=True,
                         max_checkpoints=self.max_checkpoints)

                # plot images
                if self.global_step % self.plot_gaps == 0:
                    img = self.graph(z=z, y_onehot=y_onehot, reverse=True)
                    # img = torch.clamp(img, min=0, max=1.0)

                    if self.y_condition:
                        if self.y_criterion == "multi-classes":
                            y_pred = torch.sigmoid(y_logits)
                        elif self.y_criterion == "single-class":
                            y_pred = thops.onehot(torch.argmax(F.softmax(y_logits, dim=1), dim=1, keepdim=True),
                                                  self.y_classes)
                        y_true = y_onehot

                    # plot images
                    # self.writer.add_image("0_reverse/{}".format(bi), torch.cat((img[bi], batch["x"][bi]), dim=1), self.global_step)
                    vutils.save_image(torch.stack([torch.cat((img[bi], batch["x"][bi]), dim=1) for bi in range(min([len(img), self.n_image_samples]))]), '/tmp/vikramvoleti.png', nrow=10)
                    experiment.log_image('/tmp/vikramvoleti_rev.png', file_name="0_reverse")

                    # plot preds
                    # for bi in range(min([len(img), self.n_image_samples])):
                    #     # wandb.log({"0_reverse_{}".format(bi): [wandb.Image(torch.cat((img[bi], batch["x"][bi]), dim=1), caption="0_reverse/{}".format(bi))]}, step=self.global_step)
                    #     if self.y_condition:
                    #         # self.writer.add_image("1_prob/{}".format(bi), plot_prob([y_pred[bi], y_true[bi]], ["pred", "true"]), self.global_step)
                    #         wandb.log({"1_prob_{}".format(bi): [wandb.Image(plot_prob([y_pred[bi], y_true[bi]], ["pred", "true"]))]}, step=self.global_step)

                # inference
                if hasattr(self, "inference_gap"):
                    if self.global_step % self.inference_gap == 0:
                        try:
                            img = self.graph(z=None, y_onehot=inference_y_onehot, eps_std=0.5, reverse=True)
                        except NameError:
                            inference_y_onehot = torch.zeros_like(y_onehot, device=torch.device('cpu'))
                            for i in range(inference_y_onehot.size(0)):
                                inference_y_onehot[i, (i % inference_y_onehot.size(1))] = 1.
                            # now
                            inference_y_onehot = inference_y_onehot.to(y_onehot.device)
                            img = self.graph(z=None, y_onehot=inference_y_onehot, eps_std=0.5, reverse=True)
                        # grid
                        vutils.save_image(img[:min([len(img), self.n_image_samples])], '/tmp/vikramvoleti.png', nrow=10)
                        experiment.log_image('/tmp/vikramvoleti_sam.png', file_name="1_samples")
                        # img = torch.clamp(img, min=0, max=1.0)
                        # for bi in range(min([len(img), n_images])):
                        #     # self.writer.add_image("2_sample/{}".format(bi), img[bi], self.global_step)
                        #     wandb.log({"2_sample_{}".format(bi): [wandb.Image(img[bi])]}, step=self.global_step)

                if self.global_step == 0:
                    subprocess.run('nvidia-smi')

                # global step
                self.global_step += 1
Ejemplo n.º 16
0
            parameters['launch_epoch'] = epoch
            disable_flag = 1
            sample_count = len(train_batched)

    else:
        if save:
            torch.save(model.state_dict(), model_name)
            best_idx = epoch

    best_test_F, new_test_F, _ = evaluating_batch(model, test_batched,
                                                  best_test_F)

    all_F.append([0.0, new_dev_F, new_test_F])

    sys.stdout.flush()
    print('Epoch %d : train/dev/test : %.2f / %.2f / %.2f - %d' %
          (epoch, new_train_F, new_dev_F, new_test_F, best_idx))
    model.train(True)
    adjust_learning_rate(optimizer,
                         lr=learning_rate /
                         (1 + 0.05 * sample_count / len(train_data)))

    metrics['new_train_F'] = new_train_F
    metrics['new_test_F'] = new_test_F
    metrics['new_dev_F'] = new_dev_F

    experiment.log_metrics(metrics)
    experiment.set_step(epoch + 1)

print(time.time() - t)
Ejemplo n.º 17
0
def train(args, use_comet : bool = True):

    data_cls = funcs[args['dataset']]
    model_cls = funcs[args['model']]
    network = funcs[args['network']]

    print ('[INFO] Getting dataset...')
    data = data_cls()
    data.load_data()
    (x_train, y_train), (x_test, y_test) = (data.x_train, data.y_train), (data.x_test, data.y_test)
    classes = data.mapping
    
    # #Used for testing only
    # x_train = x_train[:100, :, :]
    # y_train = y_train[:100, :]
    # x_test = x_test[:100, :, :]
    # y_test = y_test[:100, :]
    # print ('[INFO] Training shape: ', x_train.shape, y_train.shape)
    # print ('[INFO] Test shape: ', x_test.shape, y_test.shape)
    # #delete these lines

    # distribute 90% test 10% val dataset with equal class distribution 
    (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.2, random_state=42)

    print ('[INFO] Training shape: ', x_train.shape, y_train.shape)
    print ('[INFO] Validation shape: ', x_valid.shape, y_valid.shape)
    print ('[INFO] Test shape: ', x_test.shape, y_test.shape)

    print ('[INFO] Setting up the model..')
    if args['network'] == 'lstmctc':
        network_args = {'backbone' : args['backbone'],
                        'seq_model' : args['seq'],
                        'bi' : args['bi']
                        }
        model = model_cls(network, data_cls, network_args)
    else:
        model = model_cls(network, data_cls)
    print (model)
    
    dataset = dict({
        'x_train' : x_train,
        'y_train' : y_train,
        'x_valid' : x_valid,
        'y_valid' : y_valid,
        'x_test' : x_test,
        'y_test' : y_test
    })

    if use_comet and args['find_lr'] == False:
        #create an experiment with your api key
        experiment = Experiment(api_key='WVBNRAfMLCBWslJAAsffxM4Gz',
                                project_name='iam_lines',
                                auto_param_logging=False)
        
        print ('[INFO] Starting Training...')
        #will log metrics with the prefix 'train_'   
        with experiment.train():
            _ = train_model(
                    model,
                    dataset,
                    batch_size=args['batch_size'],
                    epochs=args['epochs'],
                    name=args['network']
                    )

        print ('[INFO] Starting Testing...')    
        #will log metrics with the prefix 'test_'
        with experiment.test():  
            score = model.evaluate(dataset, int(args['batch_size']))
            print(f'[INFO] Test evaluation: {score*100}...')
            metrics = {
                'accuracy':score
            }
            experiment.log_metrics(metrics)    

        experiment.log_parameters(args)
        experiment.log_dataset_hash(x_train) #creates and logs a hash of your data 
        experiment.end()

    elif use_comet and args['find_lr'] == True:

        _ = train_model(
                    model,
                    dataset,
                    batch_size=args['batch_size'],
                    epochs=args['epochs'],
                    FIND_LR=args['find_lr'],
                    name=args['network']
                    )

    else :

        print ('[INFO] Starting Training...')
        train_model(
            model,
            dataset,
            batch_size=args['batch_size'],
            epochs=args['epochs'],
            name=args['network']
            )
        print ('[INFO] Starting Testing...')    
        score = model.evaluate(dataset, args['batch_size'])
        print(f'[INFO] Test evaluation: {score*100}...')

    if args['weights']:
        model.save_weights()
    
    if args['save_model']:
        model.save_model()
Ejemplo n.º 18
0
def main(model_name, pose_refine, exp: Experiment):
    config, m = load_model(model_name)
    test_set = get_dataset(config)

    params_path = os.path.join(LOG_PATH, str(model_name),
                               "preprocess_params.pkl")
    transform = SaveableCompose.from_file(params_path, test_set, globals())
    test_set.transform = transform

    assert isinstance(transform.transforms[1].normalizer, MeanNormalize3D)
    normalizer3d = transform.transforms[1].normalizer

    post_process_func = get_postprocessor(config, test_set, normalizer3d)

    prefix = "R" if pose_refine else "NR"
    prefix = f"mupo_{prefix}"
    # logger = TemporalMupotsEvaluator(
    #     m,
    #     test_set,
    #     config["model"]["loss"],
    #     True,
    #     post_process3d=post_process_func,
    #     prefix="mupo_NR",
    #     orient_norm=None # config["orient_norm"]
    # )
    logger = TemporalTestEvaluator(
        m,
        test_set,
        config["model"]["loss"],
        True,
        post_process3d=post_process_func,
        prefix="mpi_NR",
        orient_norm=None  # config["orient_norm"]
    )
    logger.eval(calculate_scale_free=not pose_refine, verbose=not pose_refine)
    exp.log_metrics(logger.losses_to_log)

    # pred_3d = unstack_mpi3dhp_poses(test_set, logger)
    # print("\n%13s  R-PCK  R-AUC  A-PCK  A-AUC" % "")
    # print("%13s: " % "all poses", end="")
    # keys = ["R-PCK", "R-AUC", "A-PCK", "A-AUC"]
    # values = []
    # for relative in [True, False]:
    #     pcks, aucs = mpii_3dhp.eval_poses(
    #         relative,
    #         "annot3" if config["pose3d_scaling"] == "normal" else "univ_annot3",
    #         pred_3d,
    #     )
    #     pck = np.mean(list(pcks.values()))
    #     auc = np.mean(list(aucs.values()))
    #     values.append(pck)
    #     values.append(auc)

    #     print(" %4.1f   %4.1f  " % (pck, auc), end="")
    # print()
    # exp.log_metrics({f"{prefix}-{k}": v for k, v in zip(keys, values)})

    if pose_refine:
        refine_config = load("../models/pose_refine_config.json")
        pred = np.concatenate([logger.preds[i] for i in range(1, 21)])
        pred = optimize_poses(pred, test_set, refine_config)
        l = StackedArrayAllMupotsEvaluator(pred, test_set, True, prefix=prefix)
        l.eval(calculate_scale_free=True, verbose=True)
        exp.log_metrics(l.losses_to_log)

        pred_by_seq = {}
        for seq in range(1, 21):
            inds = test_set.index.seq_num == seq
            pred_by_seq[seq] = pred[inds]
        pred_2d, pred_3d = unstack_mupots_poses(test_set, pred_by_seq)
    else:
        pred_2d, pred_3d = unstack_mupots_poses(test_set, logger.preds)
        exp.log_metrics(logger.losses_to_log)

    print("\nR-PCK  R-AUC  A-PCK  A-AUC")
    keys = ["R-PCK", "R-AUC", "A-PCK", "A-AUC"]
    values = []
    for relative in [True, False]:
        pcks, aucs = mupots_3d.eval_poses(
            False,
            relative,
            "annot3"
            if config["pose3d_scaling"] == "normal" else "univ_annot3",
            pred_2d,
            pred_3d,
            keep_matching=True,
        )
        pck = np.mean(list(pcks.values()))
        auc = np.mean(list(aucs.values()))
        values.append(pck)
        values.append(auc)

        print(" %4.1f   %4.1f  " % (pck, auc), end="")
    print()
    exp.log_metrics({f"{prefix}-{k}": v for k, v in zip(keys, values)})
Ejemplo n.º 19
0
    if t % valid_inc == 0:
        test(valid_dist_dataset, args.beta)
        step_time = round(time.time() - start_time, 1)

        metrics = {
            metric_name: log(metric)
            for metric_name, metric in trainer.metrics.items()
        }
        metrics['step_time'] = step_time

        # validation plotting
        progbar.add(valid_inc, [('Train Loss', metrics['train_loss']),
                                ('Validation Loss', metrics['valid_loss']),
                                ('Time (s)', step_time)])
        #Plot on Comet
        experiment.log_metrics(metrics, step=t)
        # Plot on WandB
        wandb.log(metrics, step=t)

    if (t + 1) % save_inc == 0:
        trainer.save_weights(model_path,
                             run_id=wandb.run.id,
                             experiment_key=experiment.get_key())
        if not args.gcbc and not args.images:
            z_enc, z_plan = produce_cluster_fig(next(plotting_dataset),
                                                encoder,
                                                planner,
                                                TEST_DATA_PATHS[0],
                                                num_take=dl.batch_size // 4)

            #Comet
Ejemplo n.º 20
0
class AKMCS:
    """
    Create AK-MCS model (Active Kriging - Monte Carlo Simulation) for reliability analysis

    Args:
        krigobj (object): Kriging object for AKMCS analysis
        akmcsInfo (dict): Dictionary that contains AKMCS model information.
            detail akmcsInfo:
                - akmcsInfo["init_samp"] (nparray): Initial Monte-Carlo population
                - akmcsInfo["maxupdate"] (int): Maximum number of update. Defaults to 120
                - akmcsInfo["problem"] (str): Type of case

    Returns:
        updatedX (nparray): updated samples.
        minUiter (nparray): minimum U value for each iteration
    """

    def __init__(self, krigobj, akmcsInfo):
        """
        Initialize akmcs

        Args:
            krigobj (object): Kriging object for AKMCS analysis
            akmcsInfo (dict): Dictionary that contains AKMCS model information.
                detail akmcsInfo:
                    - akmcsInfo["init_samp"] (nparray): Initial Monte-Carlo population
                    - akmcsInfo["maxupdate"] (int): Maximum number of update. Defaults to 120
                    - akmcsInfo["problem"] (str): Type of case
        """
        akmcsInfo = akmcsInfocheck(akmcsInfo)
        self.krigobj = krigobj
        self.akmcsInfo = akmcsInfo
        self.init_samp = akmcsInfo['init_samp']
        self.maxupdate = akmcsInfo['maxupdate']
        self.nsamp = np.size(self.init_samp, axis=0)
        self.Gx = np.zeros(shape=[self.nsamp,1])
        self.sigmaG = np.zeros(shape=[1,self.nsamp])
        self.stop_criteria = 100 #assign large number
        self.logging = None

    def run(self, autoupdate=True, disp=True, savedatato=None, logging=False, saveimageto=None, plotdatapos=None,
            plotdataneg=None, loggingAPIkey=None, logname=None, logworkspace=None):
        """
        Run AKMCS analysis

        Args:
            autoupdate (bool): Perform automatic update on design space or not. Default to True.
            disp (bool): Display progress or not. Default to True.
            savedatato (str): Filename to save update data. e.g.: 'filename.csv'

        Return:
             None
        """
        #logging
        if logging:
            if loggingAPIkey is None or logname is None or logworkspace is None:
                raise ValueError('Logging is turned on, APIkey, project and workspace must be specified.')
            self.logging = Experiment(api_key=loggingAPIkey,
                                    project_name=logname, workspace=logworkspace)
            if savedatato is not None:
                self.logging.set_name(savedatato)
            else:
                pass

        else:
            pass
        # Calculate Gx and SigmaG
        # Split init_samp to avoid memory error
        krig_initsamp = self.krigobj.KrigInfo['X']
        t1 = time.time()
        if self.nsamp < 10000:
            self.Gx,self.sigmaG = self.krigobj.predict(self.init_samp, ['pred','s'])
        else:
            run_times = int(np.ceil(self.nsamp/10000))
            for i in range(run_times):
                start = i * 10000
                stop = (i+1) * 10000
                if i != (run_times - 1):
                    self.Gx[start:stop, :], self.sigmaG[:,start:stop] = \
                        self.krigobj.predict(self.init_samp[start:stop, :], ['pred','s'])
                else:
                    self.Gx[start:, :], self.sigmaG[:,start:] = \
                        self.krigobj.predict(self.init_samp[start:, :], ['pred','s'])
        t2 = time.time()

        # Calculate probability of failure
        self.Pf = self.pfcalc()

        # Calculate learning function U
        self.lfucalc()
        self.stopcrit()
        self.updateX = np.array([self.xnew])
        self.minUiter = np.array([self.minU])
        if disp:
            print(f"Done iter no: 0, Pf: {self.Pf}, minU: {self.minU}")

        # Update samples automatically
        while autoupdate:
            labeladded = False
            for i in range(self.maxupdate):
                # Evaluate new samples and append into Kriging object information
                t = time.time()
                ynew = evaluate(self.xnew, type=self.akmcsInfo['problem'])
                self.krigobj.KrigInfo['y'] = np.vstack((self.krigobj.KrigInfo['y'],ynew))
                self.krigobj.KrigInfo['X'] = np.vstack((self.krigobj.KrigInfo['X'], self.xnew))
                self.krigobj.KrigInfo['nsamp'] += 1

                # standardize model and train updated kriging model
                t3 = time.time()
                self.krigobj.standardize()
                self.krigobj.train(disp=False)
                t4 = time.time()

                # Calculate Gx and SigmaG
                # Split init_samp to avoid memory error
                if self.nsamp < 10000:
                    self.Gx, self.sigmaG = self.krigobj.predict(self.init_samp, ['pred', 's'])
                else:
                    run_times = int(np.ceil(self.nsamp / 10000))
                    for ii in range(run_times):
                        start = ii * 10000
                        stop = (ii + 1) * 10000
                        if ii != (run_times - 1):
                            self.Gx[start:stop, :], self.sigmaG[:, start:stop] = \
                                self.krigobj.predict(self.init_samp[start:stop, :], ['pred', 's'])
                        else:
                            self.Gx[start:, :], self.sigmaG[:, start:] = \
                                self.krigobj.predict(self.init_samp[start:, :], ['pred', 's'])

                t5 = time.time()
                # Calculate Pf, COV and LFU
                self.Pf = self.pfcalc()
                self.cov = self.covpf()
                self.lfucalc()
                self.stopcrit()
                t6 = time.time()

                # Update variables
                self.updateX = np.vstack((self.updateX,self.xnew))
                self.minUiter = np.vstack((self.minUiter,self.minU))
                elapsed = time.time() - t
                if disp:
                    print(f"iter no: {i+1}, Pf: {self.Pf}, stopcrit: {self.stop_criteria}, time(s): {elapsed}, "
                          f"ynew: {ynew}")

                if logging:
                    self.logging.log_parameter('krigtype',self.krigobj.KrigInfo['type'])
                    outdict = {"Prob_fail":self.Pf,
                            "stopcrit":self.stop_criteria,
                            "time(s)":elapsed
                    }
                    self.logging.log_metrics(outdict,step=i+1)

                if savedatato is not None:
                    temparray = np.array([i,self.Pf,self.stop_criteria,elapsed])
                    if i == 0:
                        totaldata = temparray[:]
                    else:
                        totaldata = np.vstack((totaldata,temparray))
                    filename =  savedatato
                    np.savetxt(filename, totaldata, delimiter=',', header='iter,Pf,stopcrit,time(s)')
                else:
                    pass

                if saveimageto is not None:
                    imagefile = saveimageto + str(i) + ".PNG"
                    title = "Pf = " + str(self.Pf)
                    plt.figure(0, figsize=[10, 9])
                    if not labeladded:
                        plt.scatter(plotdatapos[:, 0], plotdatapos[:, 1], c='yellow', label='Feasible')
                        plt.scatter(plotdataneg[:, 0], plotdataneg[:, 1], c='cyan', label='Infeasible')
                        plt.scatter(krig_initsamp[:, 0], krig_initsamp[:, 1], c='red', label='Initial Kriging Population')
                        plt.scatter(self.updateX[:, 0], self.updateX[:, 1], s=75, c='black', marker='x', label='Update')
                        labeladded = True
                    else:
                        plt.scatter(plotdatapos[:, 0], plotdatapos[:, 1], c='yellow')
                        plt.scatter(plotdataneg[:, 0], plotdataneg[:, 1], c='cyan')
                        plt.scatter(krig_initsamp[:, 0], krig_initsamp[:, 1], c='red')
                        plt.scatter(self.updateX[:, 0], self.updateX[:, 1], s=75, c='black', marker='x')
                    plt.xlabel('X1', fontsize=18)
                    plt.ylabel('X2', fontsize=18)
                    plt.tick_params(axis='both', which='both', labelsize=16)
                    plt.legend(loc=1, prop={'size': 15})
                    plt.title(title,fontdict={'fontsize':20})
                    plt.savefig(imagefile, format='png')
                else:
                    pass

                # Break condition
                if self.stop_criteria <= 0.05 and i >= 15:
                    break
                else:
                    pass

            print(f"COV: {self.cov}")
            if self.cov <= 0.05:
                break
            else:
                pass
            break  # temporary break for debugging, delete/comment this line later

    def pfcalc(self):
        nGless = len([i for i in self.Gx if i <= 0])
        nsamp = np.size(self.init_samp, axis=0)
        Pf = nGless / nsamp
        return Pf

    def covpf(self):
        nmc = np.size(self.init_samp, axis=0)
        if self.Pf == 0:
            cov = 1000
        else:
            cov = np.sqrt((1 - self.Pf) / (self.Pf * nmc))
        return cov

    def lfucalc(self):
        self.U = abs(self.Gx) / self.sigmaG.reshape(-1,1)
        self.minU = np.min(self.U)
        minUloc = np.argmin(self.U)
        self.xnew = self.init_samp[minUloc,:]

    def stopcrit(self):
        nsamp = np.size(self.init_samp, axis=0)
        temp1 = self.Gx - 1.96 * self.sigmaG.reshape(-1,1)
        temp2 = self.Gx + 1.96 * self.sigmaG.reshape(-1,1)
        pfp = len([i for i in temp1 if i <= 0])/nsamp
        pfn = len([i for i in temp2 if i <= 0])/nsamp
        pf0 = len([i for i in self.Gx if i <= 0])/nsamp
        if pf0 == 0:
            self.stop_criteria = 100
        else:
            self.stop_criteria = (pfp-pfn)/pf0
Ejemplo n.º 21
0
class Logger:
    def __init__(self, send_logs, tags, parameters, experiment=None):
        self.stations = 5
        self.send_logs = send_logs
        if self.send_logs:
            if experiment is None:
                json_loc = glob.glob("./**/comet_token.json")[0]
                with open(json_loc, "r") as f:
                    kwargs = json.load(f)

                self.experiment = Experiment(**kwargs)
            else:
                self.experiment = experiment
        self.sent_mb = 0
        self.speed_window = deque(maxlen=100)
        self.step_time = None
        self.current_speed = 0
        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)

    def begin_logging(self, episode_count, steps_per_ep, sigma, theta, step_time):
        self.step_time = step_time
        if self.send_logs:
            self.experiment.log_parameter("Episode count", episode_count)
            self.experiment.log_parameter("Steps per episode", steps_per_ep)
            self.experiment.log_parameter("theta", theta)
            self.experiment.log_parameter("sigma", sigma)

    def log_round(self, states, reward, cumulative_reward, info, loss, observations, step):
        self.experiment.log_histogram_3d(states, name="Observations", step=step)
        info = [[j for j in i.split("|")] for i in info]
        info = np.mean(np.array(info, dtype=np.float32), axis=0)
        try:
            # round_mb = np.mean([float(i.split("|")[0]) for i in info])
            round_mb = info[0]
        except Exception as e:
            print(info)
            print(reward)
            raise e
        self.speed_window.append(round_mb)
        self.current_speed = np.mean(np.asarray(self.speed_window)/self.step_time)
        self.sent_mb += round_mb
        # CW = np.mean([float(i.split("|")[1]) for i in info])
        CW = info[1]
        # stations = np.mean([float(i.split("|")[2]) for i in info])
        self.stations = info[2]
        fairness = info[3]

        if self.send_logs:
            self.experiment.log_metric("Round reward", np.mean(reward), step=step)
            self.experiment.log_metric("Per-ep reward", np.mean(cumulative_reward), step=step)
            self.experiment.log_metric("Megabytes sent", self.sent_mb, step=step)
            self.experiment.log_metric("Round megabytes sent", round_mb, step=step)
            self.experiment.log_metric("Chosen CW", CW, step=step)
            self.experiment.log_metric("Station count", self.stations, step=step)
            self.experiment.log_metric("Current throughput", self.current_speed, step=step)
            self.experiment.log_metric("Fairness index", fairness, step=step)

            for i, obs in enumerate(observations):
                self.experiment.log_metric(f"Observation {i}", obs, step=step)

            self.experiment.log_metrics(loss, step=step)

    def log_episode(self, cumulative_reward, speed, step):
        if self.send_logs:
            self.experiment.log_metric("Cumulative reward", cumulative_reward, step=step)
            self.experiment.log_metric("Speed", speed, step=step)

        self.sent_mb = 0
        self.last_speed = speed
        self.speed_window = deque(maxlen=100)
        self.current_speed = 0

    def end(self):
        if self.send_logs:
            self.experiment.end()
Ejemplo n.º 22
0
def experiment():
    """Run finetuning label shift exp"""
    args = get_args(None)
    name = "Finetune {}:{}:{} {}:{} {}:{}:{}:{} {}{}v{}".format(
        args.dataset,
        args.dataset_cap,
        args.warmstart_ratio,
        args.shift_strategy,
        args.dirichlet_alpha,
        args.shift_correction,
        args.rlls_reg,
        args.rlls_lambda,
        args.lr,
        "IW " if args.train_iw else "NOIW ",
        "ITIW " if args.iterative_iw else "NOITIW ",
        args.version,
    )

    # Initialize comet.ml
    if args.log:
        comet_api = api.API(api_key=comet_ml_key)
        exps = comet_api.get_experiments(
            "ericzhao28",
            project_name="active-label-shift-adaptation",
            pattern=name)
        for exp in exps:
            if exp.get_name() == name:
                raise ValueError("EXP EXISTS!")
        logger = Experiment(comet_ml_key,
                            project_name="active-label-shift-adaptation")
        logger.set_name(name)
        logger.log_parameters(vars(args))

    # Seed the experiment
    seed = args.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    print("Running seed ", seed)
    torch.cuda.set_device(args.device)
    assert torch.cuda.is_available()

    # Shuffle dataset
    dataset = get_datasets(args)
    dataset.label_ptrs(np.arange(dataset.online_len()))

    # Train h0
    net_cls = get_net_cls(args)
    network = net_cls(args.num_cls).to(args.device)

    def log_fn(epoch):
        network.eval()
        accuracy = evaluate(network,
                            dataset.iterate(args.infer_batch_size,
                                            False,
                                            split="test"),
                            args.device,
                            args,
                            label_weights=dataset.label_weights)
        print(accuracy)
        logger.log_metrics(accuracy, prefix="initial", step=epoch)

    train(network,
          dataset=dataset,
          epochs=args.initial_epochs,
          args=args,
          log_fn=log_fn)

    # Get source shift corrections
    lsmse = label_shift(network, dataset, args)

    def log_fn_shifted(epoch):
        network.eval()
        if args.iterative_iw:
            label_shift(network, dataset, args)
        accuracy = evaluate(network,
                            dataset.iterate(args.infer_batch_size,
                                            False,
                                            split="test"),
                            args.device,
                            args,
                            label_weights=dataset.label_weights)
        print(accuracy)
        logger.log_metrics(accuracy, prefix="shifted", step=epoch)

    train(network,
          dataset=dataset,
          epochs=args.initial_epochs,
          args=args,
          log_fn=log_fn_shifted)

    if args.iterative_iw:
        lsmse = label_shift(network, dataset, args)
    logger.log_metrics({"IW MSE": lsmse}, prefix="initial")
Ejemplo n.º 23
0
    if not os.path.exists('models'):
        os.mkdir('models')



    epoch = 0
    for epoch in range(int(iterations)):
    # while i < int(iterations):
        for sample in tqdm(data_loader):
            images, labels = sample['images'].to(device), sample['labels'].to(device)
            logits = model(images)       
            labels = labels.view(int(bs)*int(seq_length))  ##??
            loss = criterion(logits, labels)
            optimizer.zero_grad()
            loss.backward() 
            losses.update(loss.item(), images.size(0))
            optimizer.step()

            

            print('epoch: {}\tLoss: {loss.val:.4f} ({loss.avg:.4f})'.format(epoch, loss=losses))
            epoch += 1
            if epoch % it_save == 0:
                torch.save({'optimizer_state_dict': optimizer.state_dict(),
                            'model_state_dict': model.state_dict()}, 'models/swingnet_{}.pth.tar'.format(epoch))
            if epoch == iterations:
                break

        experiment.log_metrics("train_loss", losses, step=epoch)
Ejemplo n.º 24
0
def train(config):
    experiment = Experiment(api_key="Q8LzfxMlAfA3ABWwq9fJDoR6r",
                            project_name="hotpot",
                            workspace="fan-luo")
    experiment.set_name(config.run_name)

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.dev_eval_file, "r") as fh:
        dev_eval_file = json.load(fh)
    with open(config.idx2word_file, 'r') as fh:
        idx2word_dict = json.load(fh)

    config.save = '{}-{}'.format(config.save, time.strftime("%Y%m%d-%H%M%S"))
    create_exp_dir(
        config.save,
        scripts_to_save=['run.py', 'model.py', 'util.py', 'sp_model.py'])

    def logging(s, print_=True, log_=True):
        if print_:
            print(s)
        if log_:
            with open(os.path.join(config.save, 'log.txt'), 'a+') as f_log:
                f_log.write(s + '\n')

    logging('Config')
    for k, v in config.__dict__.items():
        logging('    - {} : {}'.format(k, v))

    logging("Building model...")
    train_buckets = get_buckets(config.train_record_file)
    dev_buckets = get_buckets(config.dev_record_file)

    def build_train_iterator():
        return DataIterator(train_buckets, config.batch_size,
                            config.para_limit, config.ques_limit,
                            config.char_limit, True, config.sent_limit)

    def build_dev_iterator():
        return DataIterator(dev_buckets, config.batch_size, config.para_limit,
                            config.ques_limit, config.char_limit, False,
                            config.sent_limit)

    if config.sp_lambda > 0:
        model = SPModel(config, word_mat, char_mat)
    else:
        model = Model(config, word_mat, char_mat)

    logging('nparams {}'.format(
        sum([p.nelement() for p in model.parameters() if p.requires_grad])))
    ori_model = model.cuda()
    model = nn.DataParallel(ori_model)
    print("next(model.parameters()).is_cuda: " +
          str(next(model.parameters()).is_cuda))
    print("next(ori_model.parameters()).is_cuda: " +
          str(next(ori_model.parameters()).is_cuda))

    lr = config.init_lr
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=config.init_lr)
    cur_patience = 0
    total_loss = 0
    total_ans_loss = 0
    total_sp_loss = 0
    global_step = 0
    best_dev_F1 = None
    stop_train = False
    start_time = time.time()
    eval_start_time = time.time()
    model.train()

    for epoch in range(10000):
        for data in build_train_iterator():
            context_idxs = Variable(data['context_idxs'])
            ques_idxs = Variable(data['ques_idxs'])
            context_char_idxs = Variable(data['context_char_idxs'])
            ques_char_idxs = Variable(data['ques_char_idxs'])
            context_lens = Variable(data['context_lens'])
            y1 = Variable(data['y1'])
            y2 = Variable(data['y2'])
            q_type = Variable(data['q_type'])
            is_support = Variable(data['is_support'])
            start_mapping = Variable(data['start_mapping'])
            end_mapping = Variable(data['end_mapping'])
            all_mapping = Variable(data['all_mapping'])

            logit1, logit2, predict_type, predict_support = model(
                context_idxs,
                ques_idxs,
                context_char_idxs,
                ques_char_idxs,
                context_lens,
                start_mapping,
                end_mapping,
                all_mapping,
                return_yp=False)
            loss_1 = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) +
                      nll_sum(logit2, y2)) / context_idxs.size(0)
            loss_2 = nll_average(predict_support.view(-1, 2),
                                 is_support.view(-1))
            loss = loss_1 + config.sp_lambda * loss_2

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.data[0]
            total_ans_loss += loss_1.data[0]
            total_sp_loss += loss_2.data[0]
            global_step += 1

            if global_step % config.period == 0:
                cur_loss = total_loss / config.period
                cur_ans_loss = total_ans_loss / config.period
                cur_sp_loss = total_sp_loss / config.period
                elapsed = time.time() - start_time
                logging(
                    '| epoch {:3d} | step {:6d} | lr {:05.5f} | ms/batch {:5.2f} | train loss {:8.3f} | answer loss {:8.3f} | supporting facts loss {:8.3f} '
                    .format(epoch, global_step, lr,
                            elapsed * 1000 / config.period, cur_loss,
                            cur_ans_loss, cur_sp_loss))
                experiment.log_metrics(
                    {
                        'train loss': cur_loss,
                        'train answer loss': cur_ans_loss,
                        'train supporting facts loss': cur_sp_loss
                    },
                    step=global_step)
                total_loss = 0
                total_ans_loss = 0
                total_sp_loss = 0
                start_time = time.time()

            if global_step % config.checkpoint == 0:
                model.eval()
                metrics = evaluate_batch(build_dev_iterator(), model, 0,
                                         dev_eval_file, config)
                model.train()

                logging('-' * 89)
                logging(
                    '| eval {:6d} in epoch {:3d} | time: {:5.2f}s | dev loss {:8.3f} | answer loss {:8.3f} | supporting facts loss {:8.3f} | EM {:.4f} | F1 {:.4f}'
                    .format(global_step // config.checkpoint, epoch,
                            time.time() - eval_start_time, metrics['loss'],
                            metrics['ans_loss'], metrics['sp_loss'],
                            metrics['exact_match'], metrics['f1']))
                logging('-' * 89)
                experiment.log_metrics(
                    {
                        'dev loss': metrics['loss'],
                        'dev answer loss': metrics['ans_loss'],
                        'dev supporting facts loss': metrics['sp_loss'],
                        'EM': metrics['exact_match'],
                        'F1': metrics['f1']
                    },
                    step=global_step)

                eval_start_time = time.time()

                dev_F1 = metrics['f1']
                if best_dev_F1 is None or dev_F1 > best_dev_F1:
                    best_dev_F1 = dev_F1
                    torch.save(ori_model.state_dict(),
                               os.path.join(config.save, 'model.pt'))
                    cur_patience = 0
                else:
                    cur_patience += 1
                    if cur_patience >= config.patience:
                        lr /= 2.0
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr
                        if lr < config.init_lr * 1e-2:
                            stop_train = True
                            break
                        cur_patience = 0
        if stop_train: break
    logging('best_dev_F1 {}'.format(best_dev_F1))
Ejemplo n.º 25
0
logger.info("Get prediction...")
y_pred = clf.predict(X_test_scaled)

print("\nResults\nConfusion matrix \n {}".format(confusion_matrix(y_test, y_pred)))

f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("F1 score is {:6.3f}".format(f1))
print("Precision score is {:6.3f}".format(precision))
print("Recall score is {:6.3f}".format(recall))

# these will be logged to your sklearn-demos project on Comet.ml
params = {
    "random_state": random_state,
    "model_type": "logreg",
    "scaler": "standard scaler",
    "param_grid": str(param_grid),
    "stratify": True,
}

metrics = {"f1": f1, "recall": recall, "precision": precision}

logger.info("Logging params, meta info to commet.ml ...")
exp.log_dataset_hash(X_train_scaled)
exp.log_parameters(params)
exp.log_metrics(metrics)

logger.info("Done...")
Ejemplo n.º 26
0
class Reptile(Task):
    """
    A meta-learning task that teaches an agent over a set of other tasks
    """
    def __init__(self,
                 data_handler,
                 load_key=None,
                 sender=True,
                 receiver=True,
                 image_captioner=True,
                 image_selector=False,
                 track_results=True):
        self.sess = Agent.sess
        self.N = 1  # number of steps taken for each task - should be > 1

        self.S = SenderAgent()
        self.R = ReceiverAgent(*self.S.get_output())
        self.IC = ImageCaptioner()
        # self.IS = ImageSelector()

        self.S.all_agents_initialized(load_key)
        self.R.all_agents_initialized(load_key)

        self.train_metrics = {}
        self.val_metrics = {}
        self.experiment = Experiment(api_key='1jl4lQOnJsVdZR6oekS6WO5FI',
                                     project_name='Reptile',
                                     auto_param_logging=False,
                                     auto_metric_logging=False,
                                     disabled=(not track_results))

        self.params = {}
        self.params.update(Agent.get_params())
        self.params.update(data_handler.get_params())
        self.experiment.log_parameters(self.params)

        self.T = {}
        if image_captioner:
            self.ic = ImageCaptioning(self.IC,
                                      experiment=self.experiment,
                                      track_results=False)
            self.T["Image Captioner"] = lambda img, capts: self.ic.train_batch(
                (img, capts), mode="train")
        if image_selector:
            self.is_ = ImageSelection(self.IS,
                                      experiment=self.experiment,
                                      track_results=False)
            self.T["Image Selector"] = lambda img, capts: self.is_.train_batch(
                (img, capts), mode="train")
        if sender or receiver:
            self.rg = ReferentialGame(self.S,
                                      self.R,
                                      experiment=self.experiment,
                                      track_results=False)
            if receiver:
                self.T["Receiver"] = lambda img, capts: self.rg.train_batch(
                    img, mode="receiver_train")
            if sender:
                self.T["Sender"] = lambda img, capts: self.rg.train_batch(
                    img, mode="sender_train")

        # Initialize TF
        variables_to_initialize = tf.global_variables()
        if load_key is not None:
            dont_initialize = []
            if SenderAgent.loaded:
                dont_initialize += SenderAgent.get_all_weights()
            if ReceiverAgent.loaded:
                dont_initialize += ReceiverAgent.get_all_weights()
            if ImageCaptioner.loaded:
                dont_initialize += ImageCaptioner.get_all_weights()
            variables_to_initialize = [
                v for v in tf.global_variables() if v not in dont_initialize
            ]
            # REMOVE LATER
            #variables_to_initialize += ImageCaptioner.optimizer.variables()
        Agent.sess.run(tf.variables_initializer(variables_to_initialize))

        self.sender_shared_state = VariableState(
            self.sess, SenderAgent.get_shared_weights())
        self.receiver_shared_state = VariableState(
            self.sess, ReceiverAgent.get_shared_weights())
        self.sender_own_state = VariableState(self.sess,
                                              SenderAgent.get_weights())
        self.receiver_own_state = VariableState(self.sess,
                                                ReceiverAgent.get_weights())

        # print(SenderAgent.get_shared_weights())
        # print(ReceiverAgent.get_shared_weights())
        # print(SenderAgent.get_weights())
        # print(ReceiverAgent.get_weights())
        # print(tf.trainable_variables())

        self.shared_states = {
            "shared_sender": self.sender_shared_state,
            "shared_receiver": self.receiver_shared_state
        }
        self.own_states = {
            "own_sender": self.sender_own_state,
            "own_receiver": self.receiver_own_state
        }

        shared_average = []
        for k, v in self.shared_states.items():
            shared_average.append(v.export_variables())

        shared_average = np.mean(shared_average, axis=0)
        self.set_weights(new_shared_weights=shared_average)

        self.dh = data_handler
        with open(
                "{}/data/csv_loss_{}.csv".format(project_path,
                                                 self.experiment.get_key()),
                'w+') as csv_loss_file:
            csv_loss_file.write(
                "Image Captioner Loss,Image Selector Loss,Sender Loss,Receiver Loss\n"
            )
        with open(
                "{}/data/csv_accuracy_{}.csv".format(
                    project_path, self.experiment.get_key()),
                'w+') as csv_acc_file:
            csv_acc_file.write(
                "Image Captioner Loss,Image Selector Loss,Sender Loss,Receiver Loss\n"
            )

        self.step = 0

    def get_diff(self, a, b):
        diff = 0.
        if isinstance(a, (np.ndarray, np.generic)):
            return np.sum(np.abs(a - b))

        elif isinstance(a, list):
            for i in range(len(a)):
                diff += self.get_diff(a[i], b[i])

        elif isinstance(a, dict):
            for k in a:
                diff += self.get_diff(a[k], b[k])

        return diff

    def set_weights(self, new_own_weights=None, new_shared_weights=None):
        if new_own_weights is not None:
            for k, s in self.own_states.items():
                s.import_variables(new_own_weights[k])
        if new_shared_weights is not None:
            for k, s in self.shared_states.items():
                s.import_variables(new_shared_weights)

    def train_epoch(self, e, mode=None):
        self.dh.set_params(distractors=0)
        image_gen = self.dh.get_images(return_captions=True, mode="train")
        # Get current variables
        start_vars = {
            k: s.export_variables()
            for k, s in self.own_states.items()
        }
        start_vars["shared"] = self.shared_states[
            "shared_sender"].export_variables()

        while True:
            try:

                # Save current variables
                old_own = {
                    k: s.export_variables()
                    for k, s in self.own_states.items()
                }
                new_own = {k: [] for k, s in self.own_states.items()}
                old_shared = self.shared_states[
                    "shared_sender"].export_variables()
                new_shared = []

                # For each task
                for task in ["Image Captioner", "Sender", "Receiver"]:
                    # parameter setup to not waste data
                    if task in ["Sender", "Receiver", "Image Selector"]:
                        self.dh.set_params(distractors=Agent.D)
                    else:
                        self.dh.set_params(distractors=0)
                    # Run task n times
                    for _ in range(self.N):
                        images, captions = next(image_gen)
                        acc, loss = self.T[task](images, captions)
                    self.train_metrics[task + " Accuracy"] = acc
                    self.train_metrics[task + " Loss"] = loss

                    # Store new variables
                    [
                        new_own[k].append(s.export_variables())
                        for k, s in self.own_states.items()
                    ]
                    [
                        new_shared.append(s.export_variables())
                        for k, s in self.shared_states.items()
                    ]

                    # Reset to old variables for next task
                    [
                        s.import_variables(old_own[k])
                        for k, s in self.own_states.items()
                    ]
                    [
                        s.import_variables(old_shared)
                        for k, s in self.shared_states.items()
                    ]

                self.step += 1
                self.experiment.set_step(self.step)
                self.experiment.log_metrics(self.train_metrics)
                # Average new variables
                new_own = {
                    k: interpolate_vars(old_own[k], average_vars(new_own[k]),
                                        0.2)
                    for k, s in self.own_states.items()
                }
                new_shared = interpolate_vars(old_shared,
                                              average_vars(new_shared), 0.2)
                # Set variables to new variables
                self.set_weights(new_own_weights=new_own,
                                 new_shared_weights=new_shared)

            except StopIteration:
                break

        # Get change in weights
        end_vars = {
            k: s.export_variables()
            for k, s in self.own_states.items()
        }
        end_vars["shared"] = self.shared_states[
            "shared_sender"].export_variables()
        weight_diff = self.get_diff(start_vars, end_vars)

        #self.experiment.set_step(e)
        self.val_metrics["Weight Change"] = weight_diff
        self.experiment.log_metrics(self.val_metrics)

        # Log data to a csv
        with open("{}/data/csv_loss_{}.csv".format(project_path, self.experiment.get_key()), 'a') as csv_loss_file, \
             open("{}/data/csv_accuracy_{}.csv".format(project_path, self.experiment.get_key()), 'a') as csv_acc_file:
            losses = []
            accs = []
            for task in ["Image Captioner", "Sender", "Receiver"]:
                losses.append(str(self.train_metrics[task + " Loss"]))
                accs.append(str(self.train_metrics[task + " Accuracy"]))

            csv_loss_file.write(",".join(losses))
            csv_loss_file.write("\n")

            csv_acc_file.write(",".join(accs))
            csv_acc_file.write("\n")

        return 0, weight_diff
Ejemplo n.º 27
0
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        """
        Loss_D - discriminator loss calculated as the sum of losses for the all real and all fake batches (log(D(x))+log(D(G(z)))).
        Loss_G - generator loss calculated as log(D(G(z)))
        D(x) - the average output (across the batch) of the discriminator for the all real batch.
               This should start close to 1 then theoretically converge to 0.5 when G gets better. Think about why this is.
        D(G(z)) - average discriminator outputs for the all fake batch. The first number is before D is updated and the second number
                  is after D is updated. These numbers should start near 0 and converge to 0.5 as G gets better. Think about why this is.
        """

        # Output training stats
        if i % 10 == 0:
            print('step:', steps, ' epoch:', epoch)
            experiment.log_metrics({'Loss_D': errD.item(), 'Loss_G': errG.item(), 'D(x)': D_x, 'D(G(z1))': D_G_z1, 'D(G(z2))': D_G_z2})

        if (steps % 100 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
            fixed_noise = torch.randn(3, nz, 1, 1, device=device)
            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu().numpy()

                plot = plt.figure(figsize=(20, 10))
                for m in range(3):
                    plt.subplot(1, 3, m + 1)
                    plt.imshow((fake[m].transpose((1, 2, 0))+1)/2)
                experiment.log_figure(figure_name='epoch_{}_{}'.format(epoch, steps))
                plt.close()

        steps += 1
            # person and ball metrics
            metrics = {
                'val_presision': precision.mean(),
                'val_precision_person': precision[0].mean(),
                'val_precision_ball': precision[1].mean(),
                'val_recall': recall.mean(),
                'val_recall_person': recall[0].mean(),
                'val_recall_ball': recall[1].mean(),
                'val_mAP': AP.mean(),
                'val_AP_person': AP[0].mean(),
                'val_AP_ball': AP[1].mean(),
                'val_f1': f1.mean()
            }

            experiment.log_metrics(metrics, step=epoch)

        # if epoch % opt.checkpoint_interval == 0:
        #     torch.save(model.state_dict(), f"checkpoints/yolov3_ckpt_%d.pth" % epoch)
        #     print("Save model: ",  f"checkpoints/yolov3_ckpt_%d.pth" % epoch)

        # plot image
        # if epoch % 10 == 0:
        #     model.eval()
        # cv2_img = cv2.imread("data/obj/20191201F-netvsYSCC_00049.jpg")
        # cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB)
        # img = Image.open("data/obj/20191201F-netvsYSCC_00049.jpg")
        # demo_img = transforms.ToTensor()(Image.open("data/obj/20191201F-netvsYSCC_00049.jpg").convert('RGB'))
        # demo_diff = transforms.ToTensor()(Image.open("data/difference/20191201F-netvsYSCC/20191201F-netvsYSCC_00049.jpg").convert('L'))

        # demo_img = demo_img * demo_diff + demo_img
Ejemplo n.º 29
0
def eval(
    model: LiSSModel,
    dataset: UnalignedDataset,
    exp: Experiment,
    total_iters: int = 0,
    nb_ims: int = 30,
):
    liss = model.opt.model == "liss"
    metrics = {}
    print(f"----------- Evaluation {total_iters} ----------")
    with torch.no_grad():
        data = {
            "translation": {
                "A": {
                    "rec": None,
                    "idt": None,
                    "real": None,
                    "fake": None
                },
                "B": {
                    "rec": None,
                    "idt": None,
                    "real": None,
                    "fake": None
                },
            }
        }
        force = set(["identity", "translation"])
        if liss:
            for t in model.tasks:
                tmp = {}
                if t.eval_visuals_pred or t.log_type == "acc":
                    tmp["pred"] = None
                if t.eval_visuals_target or t.log_type == "acc":
                    tmp["target"] = None
                data[t.key] = {domain: deepcopy(tmp) for domain in "AB"}

            force |= set(model.tasks.keys)

        losses = {
            k: []
            for k in dir(model) if k.startswith("loss_")
            and isinstance(getattr(model, k), torch.Tensor)
        }
        for i, b in enumerate(dataset):
            # print(f"\rEval batch {i}", end="")

            model.set_input(b)
            model.forward(force=force)
            model.backward_G(losses_only=True, force=force)

            for k in dir(model):
                if k.startswith("loss_") and isinstance(
                        getattr(model, k), torch.Tensor):
                    if k not in losses:
                        losses[k] = []
                    losses[k].append(getattr(model, k).detach().cpu().item())

            if liss:

                for t in model.tasks:
                    for domain in "AB":
                        for dtype in data[t.key][domain]:
                            if (t.log_type != "acc"
                                    and data[t.key][domain][dtype] is not None
                                    and
                                    len(data[t.key][domain][dtype]) >= nb_ims):
                                continue

                            v = model.get(
                                f"{domain}_{t.key}_{dtype}").detach().cpu()
                            if data[t.key][domain][dtype] is None:
                                data[t.key][domain][dtype] = v
                            else:
                                data[t.key][domain][dtype] = torch.cat(
                                    [data[t.key][domain][dtype], v],
                                    dim=0,
                                )

            # -------------------------
            # -----  Translation  -----
            # -------------------------
            if (data["translation"]["A"]["real"] is None
                    or len(data["translation"]["A"]["real"]) < nb_ims):
                for domain in "AB":
                    for dtype in ["real", "fake", "rec", "idt"]:
                        dom = domain
                        if dtype in {"fake", "idt"}:
                            dom = swap_domain(domain)
                        v = model.get(f"{dom}_{dtype}").detach().cpu()
                        if data["translation"][domain][dtype] is None:
                            data["translation"][domain][dtype] = v
                        else:
                            data["translation"][domain][dtype] = torch.cat(
                                [data["translation"][domain][dtype], v], dim=0)
                        # print(
                        #     f"{domain} {dtype} {len(data['translation'][domain][dtype])}"
                        # )

    for task in data:
        if task != "translation" and model.tasks[task].log_type != "vis":
            continue
        for domain in data[task]:
            for i, v in data[task][domain].items():
                data[task][domain][i] = torch.cat(list(v[:nb_ims].permute(
                    0, 2, 3, 1)),
                                                  axis=1)

    log_images = int(data["translation"]["A"]["real"].shape[1] /
                     data["translation"]["A"]["real"].shape[0])
    im_size = data["translation"]["A"]["real"].shape[0]

    ims = {"A": None, "B": None}

    data_keys = ["translation"]
    translation_keys = ["real", "fake", "rec", "idt"]
    data_keys += [task for task in data if task not in data_keys]

    for task in data_keys:
        if task != "translation" and model.tasks[task].log_type != "vis":
            continue
        for domain in "AB":
            im_types = (translation_keys if task == "translation" else list(
                data[task][domain].keys()))
            for im_type in im_types:
                v = data[task][domain][im_type].float()
                if task == "depth":
                    v = to_min1_1(v)
                    v = v.repeat((1, 1, 3))
                v = v + 1
                v = v / 2
                if ims[domain] is None:
                    ims[domain] = v
                else:
                    ims[domain] = torch.cat([ims[domain], v], dim=0)

    # ------------------------
    # -----  Comet Logs  -----
    # ------------------------
    for i in range(0, log_images, 5):
        k = i + 5
        exp.log_image(
            ims["A"][:, i * im_size:k * im_size, :].numpy(),
            "test_A_{}_{}_rfcidg".format(i * 5, (i + 1) * 5 - 1),
            step=total_iters,
        )
        exp.log_image(
            ims["B"][:, i * im_size:k * im_size, :].numpy(),
            "test_B_{}_{}_rfcidg".format(i * 5, (i + 1) * 5 - 1),
            step=total_iters,
        )
    if liss:
        test_losses = {
            "test_" + ln: np.mean(losses["loss_" + ln])
            for t in model.tasks for ln in t.loss_names
        }

        test_accs = {
            f"test_G_{domain}_{t.key}_acc":
            np.mean(data[t.key][domain]["pred"].max(-1)[1].numpy() == data[
                t.key][domain]["target"].numpy())
            for domain in "AB" for t in model.tasks if t.log_type == "acc"
        }

        if liss:
            exp.log_metrics(test_losses, step=total_iters)
            exp.log_metrics(test_accs, step=total_iters)

            for t in model.tasks:
                if t.log_type != "acc":
                    continue
                for domain in "AB":
                    target = data[t.key][domain]["target"].numpy()
                    pred = data[t.key][domain]["pred"].numpy()
                    exp.log_confusion_matrix(
                        get_one_hot(target, t.output_dim),
                        pred,
                        file_name=
                        f"confusion_{domain}_{t.key}_{total_iters}.json",
                        title=f"confusion_{domain}_{t.key}_{total_iters}.json",
                    )
            metrics = {k + "_loss": v for k, v in test_losses.items()}
            metrics.update(test_accs)
    print("----------- End Evaluation----------")
    return metrics
Ejemplo n.º 30
0
class _Experiment(object):
    def __init__(self, name):
        config_data = read_file_in_dir('./', name + '.json')

        if config_data is None:
            raise Exception("Configuration file doesn't exist: ", name)

        self.name = config_data['experiment_name']

        dataset_config = config_data['dataset']
        data_percentage = dataset_config['data_percentage']
        batch_size = dataset_config['batch_size']
        num_workers = dataset_config['num_workers']
        data_files = dataset_config['data_file_path']

        experiment_config = config_data['experiment']
        self.epochs = experiment_config['num_epochs']
        learning_rate = experiment_config['learning_rate']

        model_config = config_data['model']
        self.is_vae = model_config['is_vae']
        hidden_size = model_config['hidden_size']
        embedding_size = model_config['embedding_size']
        self.is_variational = model_config['is_variational']
        self.is_conditional = model_config['is_conditional']
        class_label = model_config['class_label']

        generation_config = config_data['generation']
        max_length = generation_config['max_length']
        prediction_type = ("Stochastic",
                           "Deterministic")[generation_config["deterministic"]]
        temperature = generation_config['temperature']

        self.experiment_dir = os.path.join(ROOT_STATS_DIR, self.name)
        self.log_comet = config_data['comet']['log_comet']

        if self.log_comet:
            self.experiment = Experiment(
                api_key="CaoCCUZVjE0gXKA3cbtMKSSKL",
                project_name="image-captioning-251b",
                workspace="keshav919",
            )

        # Load Datasets
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.train_loader, self.val_loader, self.test_loader = getDataloaders(
            data_files,
            max_length,
            batch_size,
            num_workers,
            tokenizer,
            data_percentage,
            class_label=class_label)

        # Setup Experiment
        self.current_epoch = 0
        self.training_losses = []
        self.bleu1_t = []  # Geeling: log bleu scores
        self.bleu4_t = []
        self.bleu1_v = []  # Keshav: log bleu scores
        self.bleu4_v = []
        self.val_losses = []
        self.best_loss = float('inf')
        self.best_model = None  # Save your best model in this field and use this in test method.

        if config_data['generation']:
            tags = [self.name, self.is_variational, prediction_type]
            hyper_params = {
                "Epochs": self.epochs,
                "Batch Size": batch_size,
                "Learning Rate": learning_rate,
                "Hidden Size": hidden_size,
                "Embedding Size": embedding_size,
                "Max Length": max_length,
                "Temperature": temperature
            }

            if self.log_comet:
                self.experiment.add_tags(tags)
                self.experiment.log_parameters(hyper_params)

        # Initialize Model
        self.tokenizer = tokenizer
        self.vocab_size = tokenizer.vocab_size
        self.model = getModel(config_data, self.vocab_size)

        # TODO: need to add KL divergence
        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)

        self.init_model()

        self.load_experiment()  # Load Experiment Data if available

    def load_experiment(self):
        """
        Loads the experiment data if exists to resume training from last saved checkpoint.
        """
        os.makedirs(ROOT_STATS_DIR, exist_ok=True)

        # Since we use comet, all our metrics are logged there rather than these directories.
        # Create the dir just for test output
        os.makedirs(self.experiment_dir, exist_ok=True)

    def init_model(self):
        """
            Gets GPU ready to use
        """
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.criterion.to(self.device)

    def loss_function(self, raw_outputs, hypothesis, mu, logvar):
        ceLoss = self.criterion(raw_outputs, hypothesis)
        if self.is_variational:
            klLoss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
            klLoss /= len(raw_outputs)

            ceLoss += klLoss

        return ceLoss

    def run(self):
        start_epoch = self.current_epoch
        for epoch in range(
                start_epoch,
                self.epochs):  # loop over the dataset multiple times
            start_time = datetime.now()
            self.current_epoch = epoch

            if self.is_vae:
                ########################## VAE ##############################
                train_loss, bleu1_scores_t, bleu4_scores_t = self.train_vae()
                if self.log_comet:
                    self.experiment.log_metrics({'Train_Loss': train_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics(
                        {'Train_Metric/BLEU-1': bleu1_scores_t}, epoch=epoch)
                    self.experiment.log_metrics(
                        {'Train_Metric/BLEU-4': bleu4_scores_t}, epoch=epoch)

                val_loss, bleu1_scores_v, bleu4_scores_v = self.val_vae()
                if self.log_comet:
                    self.experiment.log_metrics({'Val_Loss': val_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics(
                        {'Val_Metric/BLEU-1': bleu1_scores_v}, epoch=epoch)
                    self.experiment.log_metrics(
                        {'Val_Metric/BLEU-4': bleu4_scores_v}, epoch=epoch)
            else:
                ########################## BERT ##############################
                train_loss, train_accu = self.train_bert()
                if self.log_comet:
                    self.experiment.log_metrics({'Train_Loss': train_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics({'Train_Accu': train_accu},
                                                epoch=epoch)

                val_loss, val_accu = self.val_bert()
                if self.log_comet:
                    self.experiment.log_metrics({'Val_Loss': val_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics({'Val_Accu': val_accu},
                                                epoch=epoch)

            # Early stopping
            if val_loss < self.best_loss:
                self.best_loss = val_loss
                torch.save(self.model, './saved_models/{}'.format(self.name))

        if self.is_vae:
            self.test_vae()
        else:
            self.test_bert()

########################## VAE ##############################

    def train_vae(self):
        self.model.train()
        training_loss = 0
        bleu1_scores = 0.0
        bleu4_scores = 0.0
        print_iter = 150
        if_counter = 0

        for i, (prem, hyp, _, _, lab) in enumerate(self.train_loader):
            self.model.zero_grad()

            prem = prem.long().to(self.device)
            hyp = hyp.long().to(self.device)
            lab = lab.to(self.device)

            # Forward pass
            preds, raw_outputs, mu, logvar = self.model(
                prem,
                hyp,
                lab,
                self.device,
                is_teacher_forcing_on=True,
                skip_generation=i % print_iter != 0,
                is_conditional=self.is_conditional)

            # Calculate loss and perform backprop
            loss = self.loss_function(raw_outputs[:, 1:].permute(0, 2, 1),
                                      hyp[:, 1:], mu, logvar)
            loss.backward()
            self.optimizer.step()

            # Log the training loss
            training_loss += loss.item()

            # View deterministic predictions
            if i % print_iter == 0:
                if_counter += 1

                # Get the sentence without the <start> and <end> and other tags
                clean_premise_text, _ = clean_caption(prem, self.tokenizer)
                clean_preds_text, _ = clean_caption(preds, self.tokenizer)
                clean_targets_text, _ = clean_caption(hyp, self.tokenizer)

                # Calculate bleu scores
                b1 = calculate_bleu(bleu1, clean_preds_text,
                                    clean_targets_text)
                b4 = calculate_bleu(bleu4, clean_preds_text,
                                    clean_targets_text)
                bleu1_scores += (b1 / len(clean_preds_text))
                bleu4_scores += (b4 / len(clean_preds_text))

                print(self.current_epoch, i, ": ------ TRAIN ------")
                print("------ Actual Premise ------")
                print(clean_premise_text[0])
                print("------ Actual Hypothesis ------")
                print(clean_targets_text[0])
                print("------ Predicted Hypothesis ------")
                print(clean_preds_text[0])
                print()

        return training_loss / (
            i + 1), bleu1_scores / if_counter, bleu4_scores / if_counter

    def val_vae(self):
        self.model.eval()
        val_loss = 0
        bleu1_scores = 0.0
        bleu4_scores = 0.0
        print_iter = 150
        if_counter = 0

        with torch.no_grad():
            for i, (prem, hyp, _, _, lab) in enumerate(self.val_loader):
                prem = prem.long().to(self.device)
                hyp = hyp.long().to(self.device)
                lab = lab.to(self.device)

                # Forward pass
                preds, raw_outputs, mu, logvar = self.model(
                    prem,
                    hyp,
                    lab,
                    self.device,
                    is_teacher_forcing_on=True,
                    skip_generation=i % print_iter != 0,
                    is_conditional=self.is_conditional)

                # Calculate loss and perform backprop
                loss = self.loss_function(raw_outputs[:, 1:].permute(0, 2, 1),
                                          hyp[:, 1:], mu, logvar)

                # Log the training loss
                val_loss += loss.item()

                # View deterministic predictions
                if i % print_iter == 0:
                    if_counter += 1

                    # Get the sentence without the <start> and <end> and other tags
                    clean_premise_text, _ = clean_caption(prem, self.tokenizer)
                    clean_preds_text, _ = clean_caption(preds, self.tokenizer)
                    clean_targets_text, _ = clean_caption(hyp, self.tokenizer)

                    # Calculate bleu scores
                    b1 = calculate_bleu(bleu1, clean_preds_text,
                                        clean_targets_text)
                    b4 = calculate_bleu(bleu4, clean_preds_text,
                                        clean_targets_text)
                    bleu1_scores += (b1 / len(clean_preds_text))
                    bleu4_scores += (b4 / len(clean_preds_text))

                    print(self.current_epoch, i, ": ------ VALIDATION ------")
                    print("------ Actual Premise ------")
                    print(clean_premise_text[0])
                    print("------ Actual Hypothesis ------")
                    print(clean_targets_text[0])
                    print("------ Predicted Hypothesis ------")
                    print(clean_preds_text[0])
                    print()

        return val_loss / (
            i + 1), bleu1_scores / if_counter, bleu4_scores / if_counter

    def test_vae(self):
        self.model = torch.load('./saved_models/{}'.format(self.name))
        self.model.eval()
        test_loss = 0
        bleu1_scores = 0.0
        bleu4_scores = 0.0
        print_iter = 150

        predicted = []
        premises = []
        labels = []
        language = []

        with torch.no_grad():
            for i, (prem, hyp, _, _, lab) in enumerate(self.test_loader):
                prem = prem.long().to(self.device)
                hyp = hyp.long().to(self.device)
                lab = lab.to(self.device)

                # Forward pass
                _, raw_outputs, mu, logvar = self.model(
                    prem,
                    hyp,
                    lab,
                    self.device,
                    is_teacher_forcing_on=True,
                    is_conditional=self.is_conditional)
                preds, _, _, _ = self.model(prem,
                                            hyp,
                                            lab,
                                            self.device,
                                            is_teacher_forcing_on=False,
                                            is_conditional=self.is_conditional)

                # Calculate loss and perform backprop
                loss = self.loss_function(raw_outputs[:, 1:].permute(0, 2, 1),
                                          hyp[:, 1:], mu, logvar)

                # Log the training loss
                test_loss += loss.item()

                # Get the sentence without the <start> and <end> and other tags
                clean_premise_text, clean_premise_joined = clean_caption(
                    prem, self.tokenizer)
                clean_preds_text, clean_preds_joined = clean_caption(
                    preds, self.tokenizer)
                clean_targets_text, _ = clean_caption(hyp, self.tokenizer)

                predicted = predicted + clean_preds_joined
                premises = premises + clean_premise_joined
                labels = labels + lab.tolist()
                language = language + ['en'] * len(clean_premise_joined)

                # Calculate bleu scores
                b1 = calculate_bleu(bleu1, clean_preds_text,
                                    clean_targets_text)
                b4 = calculate_bleu(bleu4, clean_preds_text,
                                    clean_targets_text)
                bleu1_scores += (b1 / len(clean_preds_text))
                bleu4_scores += (b4 / len(clean_preds_text))

                if i % print_iter == 0:
                    print(i, ": ------ TEST ------")
                    print("------ Actual Premise ------")
                    print(clean_premise_text[0])
                    print("------ Actual Hypothesis ------")
                    print(clean_targets_text[0])
                    print("------ Predicted Hypothesis ------")
                    print(clean_preds_text[0])
                    print()

        bleu1_scores = bleu1_scores / (i + 1)
        bleu4_scores = bleu4_scores / (i + 1)
        test_loss = test_loss / (i + 1)

        result_str = "Test Performance: Loss: {}, Bleu1: {}, Bleu4: {}".format(
            test_loss, bleu1_scores, bleu4_scores)
        self.log(result_str)

        if self.log_comet and self.is_vae:
            self.experiment.log_metrics({'Test_Loss': test_loss})
            self.experiment.log_metrics({'Test_Metric/BLEU-1': bleu1_scores})
            self.experiment.log_metrics({'Test_Metric/BLEU-4': bleu4_scores})

        ds = {
            "premise": premises,
            "hypothesis": predicted,
            "label": labels,
            "lang_abv": language
        }
        df = pd.DataFrame(
            ds, columns=["premise", "hypothesis", "label", "lang_abv"])
        df.to_csv("predicted_vae" + self.name + ".csv", index=False)

        return test_loss, bleu1_scores, bleu4_scores

########################## BERT ##############################

    def train_bert(self):
        self.model.train()
        training_loss = 0
        print_iter = 50
        total_pred = 0
        correct_pred = 0

        for i, (prem_id, hyp_id, prem_att_mask, hypo_att_mask,
                lab) in enumerate(self.train_loader):
            self.model.zero_grad()

            # Push data to GPU
            # (Model is pushed to GPU in line 127)
            prem_id = prem_id.long().to(self.device)
            hyp_id = hyp_id.long().to(self.device)
            prem_att_mask = prem_att_mask.long().to(self.device)
            hypo_att_mask = hypo_att_mask.long().to(self.device)
            lab = lab.long().to(self.device)

            # Prepare data as inputs to bert
            input_ids = torch.cat((prem_id, hyp_id), dim=1).long()
            attention_mask = torch.cat((prem_att_mask, hypo_att_mask),
                                       dim=1).long()
            label = lab.unsqueeze(1).long()

            # Forward pass
            outputs = self.model(input_ids,
                                 attention_mask=attention_mask,
                                 labels=label)

            # Calculate loss and perform backprop
            loss = outputs.loss
            loss.backward()
            self.optimizer.step()

            # Log the training loss
            training_loss += loss.item()

            # Get predicted labels
            predicted = torch.argmax(outputs.logits, 1)

            # calculate val accuracy = correct predictions/total predictions
            for j in range(len(lab)):
                total_pred += 1
                if lab[j] == predicted[j]:
                    correct_pred += 1
            accu = correct_pred / total_pred

            # View deterministic predictions
            if i % print_iter == 0:
                print(self.current_epoch, i, ": ------ TRAIN ------")
                print("------ Actual Labels ------")
                print(lab)
                print("------ Predicted Labels ------")
                print(predicted)
                print("Current training accuracy: ", accu)

            # debugging code
            # if i==print_iter+1:
            #     print("i is: ", i)
            #     sys.exit()

        return training_loss / (i + 1), accu

    def val_bert(self):
        self.model.eval()
        val_loss = 0
        print_iter = 50
        total_pred = 0
        correct_pred = 0

        with torch.no_grad():
            for i, (prem_id, hyp_id, prem_att_mask, hypo_att_mask,
                    lab) in enumerate(self.val_loader):
                self.model.zero_grad()

                # Push data to GPU
                prem_id = prem_id.long().to(self.device)
                hyp_id = hyp_id.long().to(self.device)
                prem_att_mask = prem_att_mask.long().to(self.device)
                hypo_att_mask = hypo_att_mask.long().to(self.device)
                lab = lab.long().to(self.device)

                # Prepare data as inputs to bert
                input_ids = torch.cat((prem_id, hyp_id), dim=1)
                attention_mask = torch.cat((prem_att_mask, hypo_att_mask),
                                           dim=1)
                label = lab.unsqueeze(1)

                # Forward pass
                outputs = self.model(input_ids,
                                     attention_mask=attention_mask,
                                     labels=label)

                # Calculate loss and perform backprop
                loss = outputs.loss

                # Log the val loss
                val_loss += loss.item()

                # Get predicted labels
                predicted = torch.argmax(outputs.logits, 1)

                # calculate val accuracy = correct predictions/total predictions
                for j in range(len(lab)):
                    total_pred += 1
                    if lab[j] == predicted[j]:
                        correct_pred += 1
                accu = correct_pred / total_pred

                # View deterministic predictions
                if i % print_iter == 0:
                    print(self.current_epoch, i, ": ------ VAL ------")
                    print("------ Actual Labels ------")
                    print(lab)
                    print("------ Predicted Labels ------")
                    print(predicted)
                    print("Current validation accuracy: ", accu)

        return val_loss / (i + 1), accu

    def test_bert(self):
        self.model.eval()
        test_loss = 0
        print_iter = 50
        total_pred = 0
        correct_pred = 0

        for i, (prem_id, hyp_id, prem_att_mask, hypo_att_mask,
                lab) in enumerate(self.test_loader):
            self.model.zero_grad()

            # Push data to GPU
            prem_id = prem_id.long().to(self.device)
            hyp_id = hyp_id.long().to(self.device)
            prem_att_mask = prem_att_mask.long().to(self.device)
            hypo_att_mask = hypo_att_mask.long().to(self.device)
            lab = lab.long().to(self.device)

            # Prepare data as inputs to bert
            input_ids = torch.cat((prem_id, hyp_id), dim=1)
            attention_mask = torch.cat((prem_att_mask, hypo_att_mask), dim=1)
            label = lab.unsqueeze(1)

            # Forward pass
            outputs = self.model(input_ids,
                                 attention_mask=attention_mask,
                                 labels=label)

            # Calculate loss and perform backprop
            loss = outputs.loss

            # Log the test loss
            test_loss += loss.item()

            # Get predicted labels
            predicted = torch.argmax(outputs.logits, 1)

            # calculate test accuracy = correct predictions/total predictions
            for j in range(len(lab)):
                total_pred += 1
                if lab[j] == predicted[j]:
                    correct_pred += 1
            accu = correct_pred / total_pred

            # View deterministic predictions
            if i % print_iter == 0:
                print(self.current_epoch, i, ": ------ TEST ------")
                print("------ Actual Labels ------")
                print(lab)
                print("------ Predicted Labels ------")
                print(predicted)
                print("Current test accuracy: ", accu)

        if self.log_comet and (not self.is_vae):
            self.experiment.log_metrics({'Test_Loss': test_loss})
            self.experiment.log_metrics({'Test_Accu': accu})

        return test_loss / (i + 1), accu

########################## Log ##############################

    def save_model(self):
        root_model_path = os.path.join(self.experiment_dir, 'latest_model.pt')
        model_dict = self.model.state_dict()
        state_dict = {
            'model': model_dict,
            'optimizer': self.optimizer.state_dict()
        }
        torch.save(state_dict, root_model_path)

    def __record_stats(self, train_loss, bleu1_t, bleu4_t, val_loss, bleu1_v,
                       bleu4_v):
        self.training_losses.append(train_loss)
        self.bleu1_t.append(bleu1_t)
        self.bleu4_t.append(bleu4_t)
        self.val_losses.append(val_loss)
        self.bleu1_v.append(bleu1_v)
        self.bleu4_v.append(bleu4_v)

        self.plot_stats()

        write_to_file_in_dir(self.experiment_dir, 'training_losses.txt',
                             self.__training_losses)
        write_to_file_in_dir(self.experiment_dir, 'val_losses.txt',
                             self.val_losses)

        write_to_file_in_dir(self.experiment_dir, 'bleu1.txt', self.bleu1)
        write_to_file_in_dir(self.experiment_dir, 'bleu4.txt', self.bleu4)

    def log(self, log_str, file_name=None):
        print(log_str)
        log_to_file_in_dir(self.experiment_dir, 'all.log', log_str)
        if file_name is not None:
            log_to_file_in_dir(self.experiment_dir, file_name, log_str)

    def log_epoch_stats(self, start_time):
        time_elapsed = datetime.now() - start_time
        time_to_completion = time_elapsed * (self.epochs - self.current_epoch -
                                             1)
        train_loss = self.training_losses[self.current_epoch]
        val_loss = self.val_losses[self.current_epoch]
        summary_str = "Epoch: {}, Train Loss: {}, Val Loss: {}, Took {}, ETA: {}\n"
        summary_str = summary_str.format(self.current_epoch + 1, train_loss,
                                         val_loss, str(time_elapsed),
                                         str(time_to_completion))
        self.log(summary_str, 'epoch.log')