コード例 #1
0
def generate_categories():
    # capture the config path from the run arguments then process the json configuration file
    try:
        args = get_args()
        config = process_config(args.config)
    except ValueError:
        print("Missing or invalid arguments")
        exit(0)

    print("Logging experiment name: {name}".format(
        name=config.experiment.experiment_name))
    experiment = Experiment(api_key=config.experiment.api_key,
                            project_name=config.experiment.project_name,
                            workspace=config.experiment.workspace)
    experiment.set_name(config.experiment.experiment_name)

    print('Creating the data loader...')
    data_loader = DataLoader(config.defects_summarizer.paths)
    train_data, test_data = data_loader.get_data()

    print('Creating the Preprocessor...')
    preprocessor = CorexPreprocessor(train_data, config)
    preprocessor.prepare_data()

    print('Loading and evaluating the Model...')
    model = CorexModel(config.defects_summarizer, preprocessor, seed=False)
    trainer = CorexTrainer(model, preprocessor.get_data())
    trainer.train()
    trainer.generate_topics()
    top_docs_df = trainer.get_top_documents(
        config.defects_summarizer.evaluate.extract_topics,
        preprocessor.get_raw_corpus(),
        config.defects_summarizer.evaluate.extraction_quantile,
        labels=True)
    top_docs_df.to_csv(config.defects_summarizer.paths.save_data_path)

    print('Saving the trained topic model...')
    model.save()

    print('Preprocessing the summarizer...')
    summary_preprocessor = TextRankPreprocessor(
        top_docs_df, n_docs=config.defects_summarizer.evaluate.n_docs)
    summary_preprocessor.prepare_data()

    print('Loading and evaluating the summarizer...')
    summary_model = TextRankModel(config)
    summary_trainer = TextRankTrainer(summary_model, summary_preprocessor)
    avg_prec, avg_recall, avg_f1 = summary_trainer.train_and_evaluate(
        test_data)

    # Log the rest of the experiment
    metrics = {"precision": avg_prec, "recall": avg_recall, "f1": avg_f1}
    experiment.log_metrics(metrics)

    experiment.log_model(
        name=config.experiment.model_name,
        file_or_folder=config.labels_generator.paths.save_model_path)
コード例 #2
0
def generate_topics():
    # capture the config path from the run arguments then process the json configuration file
    try:
        args = get_args()
        config = process_config(args.config)
    except ValueError:
        print("Missing or invalid arguments")
        exit(0)

    print("Logging experiment name: {name}".format(
        name=config.experiment.experiment_name))
    experiment = Experiment(api_key=config.experiment.api_key,
                            project_name=config.experiment.project_name,
                            workspace=config.experiment.workspace)
    experiment.set_name(config.experiment.experiment_name)
    params = config.labels_generator.model
    experiment.log_parameters(params)

    print('Creating the data loader...')
    data_loader = DataLoader(config.labels_generator.paths)
    data = data_loader.get_data()

    print('Creating the Preprocessor...')
    preprocessor = CorexPreprocessor(data, config)
    preprocessor.prepare_data()

    print('Creating and training the Model...')
    model = CorexModel(config, preprocessor)
    trainer = CorexTrainer(model, preprocessor.get_data())
    trainer.train()

    print('Evaluating the model...')
    coherence_lst, avg_coherence = trainer.evaluate(preprocessor.get_data(),
                                                    preprocessor.get_corpus())
    trainer.generate_topics()
    print("Coherence score: {score_lst} \nAvg coherence score: {avg_score}".
          format(score_lst=coherence_lst, avg_score=avg_coherence))

    print('Saving the trained model...')
    model.save()

    # Log the rest of the experiment
    metrics = {"coherence": avg_coherence}
    experiment.log_metrics(metrics)

    experiment.log_model(
        name=config.experiment.model_name,
        file_or_folder=config.labels_generator.paths.save_model_path)
コード例 #3
0
if args.Xlrnrate: args.lrnrate = .2
if args.Xoptimizer: args.optimizer = 'sgd'
if args.Xnet:
    args.net = 'ConvNet'
    args.lrnrate = .05
print(args)
if rank == 0:
    experiment = Experiment(project_name='metapoison-victim',
                            auto_param_logging=False,
                            auto_metric_logging=False)
    experiment.log_parameters(vars(args))
    experiment.log_parameter('nmeta', nmeta)
    experiment.set_name(args.key)
    experiment.add_tag(args.tag)

    experiment.log_model("CIFAR10", "../models/victim_model_1_run")
# args.gpu = set_available_gpus(args)
# again, hardcode the number of the GPU to avoid the error
args.gpu = [1]
if args.name == '': args.name = args.net


def victim():
    def comet_pull_next_poison():
        # grab next poison from comet that hasn't been processed
        impatience = 0
        # while not has_exitflag(args.key, api) or impatience < 5:  # patience before ending victim process
        # Get rid off has_exitflag condition and also only keep the impagtience condition
        while impatience < 5:  # patience before ending victim process
            sleep(1)
コード例 #4
0
ファイル: main.py プロジェクト: briemadu/inc-bidirectional
if args.comet_track:
            experiment.log_metric("best_valid_performance", 
                                  best_valid_performance)
            
print('Stopped at epoch {}.\n'.format(epoch+1))    
if args.comet_track:
    experiment.log_metric("last_epoch", epoch)    
elapsed_learn = time.time() - start_learn
print('Learning took {0[0]:.0f} min {0[1]:.0f} secs. \n'.format(
                                                    divmod(elapsed_learn, 60)))

# load best model during training
model.load_state_dict(torch.load('models/'+model_name+'_bestmodel.pt'))
model.eval()
if args.comet_track:
    experiment.log_model('best_model', 'models/'+model_name+'_bestmodel.pt')

# check how well it does on test set
with experiment.test():
    test_loss, preds, golds = test(test_loader, model, my_device, label_pad_id, 
                                    seq2seq, dataset='test   ')
    test_acc, test_f1 = evaluate_nn(golds, preds, id2label)
    if args.comet_track:
        experiment.log_metric("nl_loss", test_loss)
        experiment.log_metric("acc", test_acc)
        experiment.log_metric("f1", test_f1)

############################ EVALUATING INCREMENTALITY #######################

if not args.only_training:
    print('Incremental processing evaluation started.')
コード例 #5
0
ファイル: train.py プロジェクト: matech96/pose_refinement
def run_experiment(output_path, _config, exp: Experiment):
    exp.log_parameters(flatten_params(_config))
    save(os.path.join(output_path, "config.json"), _config)
    ensuredir(output_path)

    if _config["train_data"] == "mpii_train":
        print("Training data is mpii-train")
        train_data = Mpi3dTrainDataset(
            _config["pose2d_type"],
            _config["pose3d_scaling"],
            _config["cap_25fps"],
            _config["stride"],
        )

    elif _config["train_data"] == "mpii+muco":
        print("Training data is mpii-train and muco_temp concatenated")
        mpi_data = Mpi3dTrainDataset(
            _config["pose2d_type"],
            _config["pose3d_scaling"],
            _config["cap_25fps"],
            _config["stride"],
        )

        muco_data = PersonStackedMucoTempDataset(_config["pose2d_type"],
                                                 _config["pose3d_scaling"])
        train_data = ConcatPoseDataset(mpi_data, muco_data)

    elif _config["train_data"].startswith("muco_temp"):
        train_data = PersonStackedMucoTempDataset(_config["pose2d_type"],
                                                  _config["pose3d_scaling"])

    test_data = Mpi3dTestDataset(_config["pose2d_type"],
                                 _config["pose3d_scaling"],
                                 eval_frames_only=True)

    if _config["simple_aug"]:
        train_data.augment(False)

    assert _config["orient_norm"] == "gauss"
    normalizer_orient = MeanNormalizeOrient(train_data)
    # Load the preprocessing steps
    train_data.transform = None
    transforms_train = [
        decode_trfrm(_config["preprocess_2d"], globals())(train_data,
                                                          cache=False),
        decode_trfrm(_config["preprocess_3d"], globals())(train_data,
                                                          cache=False),
        normalizer_orient,
    ]

    normalizer2d = transforms_train[0].normalizer
    normalizer3d = transforms_train[1].normalizer

    transforms_test = [
        decode_trfrm(_config["preprocess_2d"], globals())(test_data,
                                                          normalizer2d),
        decode_trfrm(_config["preprocess_3d"], globals())(test_data,
                                                          normalizer3d),
        normalizer_orient,
    ]

    transforms_train.append(RemoveIndex())
    transforms_test.append(RemoveIndex())

    train_data.transform = SaveableCompose(transforms_train)
    test_data.transform = SaveableCompose(transforms_test)

    # save normalisation params
    save(output_path + "/preprocess_params.pkl",
         train_data.transform.state_dict())

    len_train = len(train_data)
    len_test = len(test_data)
    print("Length of training data:", len_train)
    print("Length of test data:", len_test)
    exp.log_parameter("train data length", len_train)
    exp.log_parameter("test data length", len_test)

    bos = train_data[[0]]["orientation"].shape
    out_shape = (bos[1] * bos[2] if _config["model"]["loss"] == "orient" else
                 MuPoTSJoints.NUM_JOINTS * 3)
    model = TemporalModelOptimized1f(
        train_data[[0]]["pose2d"].shape[-1],
        out_shape,
        _config["model"]["filter_widths"],
        dropout=_config["model"]["dropout"],
        channels=_config["model"]["channels"],
        layernorm=_config["model"]["layernorm"],
    )
    test_model = TemporalModel(
        train_data[[0]]["pose2d"].shape[-1],
        out_shape,
        _config["model"]["filter_widths"],
        dropout=_config["model"]["dropout"],
        channels=_config["model"]["channels"],
        layernorm=_config["model"]["layernorm"],
    )

    model.cuda()
    test_model.cuda()

    save(output_path + "/model_summary.txt", str(model))

    pad = (model.receptive_field() - 1) // 2
    train_loader = ChunkedGenerator(
        train_data,
        _config["batch_size"],
        pad,
        _config["train_time_flip"],
        shuffle=_config["shuffle"],
        ordered_batch=_config["ordered_batch"],
    )
    tester = ModelCopyTemporalEvaluator(test_model,
                                        test_data,
                                        _config["model"]["loss"],
                                        _config["test_time_flip"],
                                        post_process3d=get_postprocessor(
                                            _config, test_data, normalizer3d),
                                        prefix="test",
                                        orient_norm=_config["orient_norm"],
                                        normalizer_orient=normalizer_orient)

    torch_train(
        exp,
        train_loader,
        model,
        lambda m, b: calc_loss(m, b, _config, None, None, None,
                               normalizer_orient),
        # lambda m, b: calc_loss(
        #     m,
        #     b,
        #     _config,
        #     torch.tensor(normalizer2d.mean[2::3]).cuda(),
        #     torch.tensor(normalizer2d.std[2::3]).cuda(),
        #     torch.tensor(normalizer3d.std).cuda(),
        # ),
        _config,
        callbacks=[tester],
    )

    model_path = os.path.join(output_path, "model_params.pkl")
    torch.save(model.state_dict(), model_path)
    exp.log_model("model", model_path)

    save(
        output_path + "/test_results.pkl",
        {
            "index": test_data.index,
            "pred": preds_from_logger(test_data, tester),
            "pose3d": test_data.poses3d,
        },
    )
コード例 #6
0
def run_experiment(output_path, _config, exp: Experiment):
    config, m = eval.load_model(_config["weights"])
    # config.update(_config)
    config["model"].update(_config["model"])
    _config["model"] = config["model"]

    # tmp = _config["model"]["loss"]
    # _config["model"]["loss"] = "v * mse + e_smooth_small"
    exp.log_parameters(train.flatten_params(_config))
    # _config["model"]["loss"] = tmp
    save(os.path.join(output_path, "config.json"), _config)
    ensuredir(output_path)

    if _config["train_data"] == "mpii_train":
        print("Training data is mpii-train")
        train_data = Mpi3dTrainDataset(
            _config["pose2d_type"],
            _config["pose3d_scaling"],
            _config["cap_25fps"],
            _config["stride"],
        )

    elif _config["train_data"] == "mpii+muco":
        print("Training data is mpii-train and muco_temp concatenated")
        mpi_data = Mpi3dTrainDataset(
            _config["pose2d_type"],
            _config["pose3d_scaling"],
            _config["cap_25fps"],
            _config["stride"],
        )

        muco_data = PersonStackedMucoTempDataset(_config["pose2d_type"],
                                                 _config["pose3d_scaling"])
        train_data = ConcatPoseDataset(mpi_data, muco_data)

    elif _config["train_data"].startswith("muco_temp"):
        train_data = PersonStackedMucoTempDataset(_config["pose2d_type"],
                                                  _config["pose3d_scaling"])

    test_data = Mpi3dTestDataset(_config["pose2d_type"],
                                 _config["pose3d_scaling"],
                                 eval_frames_only=True)

    if _config["simple_aug"]:
        train_data.augment(False)

    # Load the preprocessing steps
    params_path = os.path.join(LOG_PATH, _config["weights"],
                               "preprocess_params.pkl")
    transform = SaveableCompose.from_file(params_path, test_data, globals())

    train_data.transform = None
    transforms_train = [
        decode_trfrm(_config["preprocess_2d"], globals())(train_data,
                                                          cache=False),
        decode_trfrm(_config["preprocess_3d"], globals())(train_data,
                                                          cache=False),
    ]

    normalizer2d = transforms_train[0].normalizer
    normalizer3d = transforms_train[1].normalizer

    transforms_test = [
        decode_trfrm(_config["preprocess_2d"], globals())(test_data,
                                                          normalizer2d),
        decode_trfrm(_config["preprocess_3d"], globals())(test_data,
                                                          normalizer3d),
    ]

    transforms_train.append(RemoveIndex())
    transforms_test.append(RemoveIndex())

    train_data.transform = SaveableCompose(transforms_train)
    test_data.transform = SaveableCompose(transforms_test)
    # train_data.transform = SaveableCompose.from_file(params_path, train_data, globals())
    # test_data.transform = SaveableCompose.from_file(params_path, test_data, globals())

    # save normalisation params
    save(output_path + "/preprocess_params.pkl",
         train_data.transform.state_dict())

    len_train = len(train_data)
    len_test = len(test_data)
    print("Length of training data:", len_train)
    print("Length of test data:", len_test)
    exp.log_parameter("train data length", len_train)
    exp.log_parameter("test data length", len_test)

    model = TemporalModelOptimized1f(
        train_data[[0]]["pose2d"].shape[-1],
        MuPoTSJoints.NUM_JOINTS,
        config["model"]["filter_widths"],
        dropout=config["model"]["dropout"],
        channels=config["model"]["channels"],
        layernorm=config["model"]["layernorm"],
    )
    model.load_state_dict(m.state_dict())
    test_model = TemporalModel(
        train_data[[0]]["pose2d"].shape[-1],
        MuPoTSJoints.NUM_JOINTS,
        config["model"]["filter_widths"],
        dropout=config["model"]["dropout"],
        channels=config["model"]["channels"],
        layernorm=config["model"]["layernorm"],
    )

    model.cuda()
    test_model.cuda()

    save(output_path + "/model_summary.txt", str(model))

    # normalizer2d = train_data.transform.transforms[0].normalizer
    # normalizer3d = train_data.transform.transforms[1].normalizer

    pad = (model.receptive_field() - 1) // 2
    train_loader = ChunkedGenerator(
        train_data,
        _config["batch_size"],
        pad,
        _config["train_time_flip"],
        shuffle=_config["shuffle"],
        ordered_batch=_config["ordered_batch"],
    )
    tester = ModelCopyTemporalEvaluator(
        test_model,
        test_data,
        config["model"]["loss"],
        _config["test_time_flip"],
        post_process3d=get_postprocessor(_config, test_data, normalizer3d),
        prefix="test",
    )

    torch_train(
        exp,
        train_loader,
        model,
        lambda m, b: train.calc_loss(
            m,
            b,
            _config,
            torch.tensor(normalizer2d.mean[2::3]).cuda(),
            torch.tensor(normalizer2d.std[2::3]).cuda(),
            torch.tensor(normalizer3d.std).cuda(),
        ),
        _config,
        callbacks=[tester],
    )

    model_path = os.path.join(output_path, "model_params.pkl")
    torch.save(model.state_dict(), model_path)
    exp.log_model("model", model_path)
コード例 #7
0
ファイル: dnai_logging.py プロジェクト: juanelenter/DNAi
class experiment_logger:
    '''
    Interface for logging experiments on neptune, comet, or both.
    Args: log_backend, project_name)
    Other backends may also be added in the future
    Currently defined methods:
        add_params:
        add_tags:
        log_text: strings
        log_metrics: numerical values
        log_figure: pyplot figures
        
        stop: end logging and close connection
    '''
    def __init__(self, log_backend, project_name):
        '''

        Parameters
        ----------
        log_backend : STR
            One of 'comet', 'neptune', 'all'
        project_name : STR
            one of available proyects ('yeast', 'jersey', 'wheat', 'debug', etc)
            
        Returns
        -------
        None.

        '''
        self.proj_name = project_name
        self.backend = log_backend
        #Bool indicating wether neptune logging is enabled
        self.neptune = log_backend == 'neptune' or log_backend == 'all'
        #Bool indicating wether comet logging is enabled
        self.comet = log_backend == 'comet' or log_backend == 'all'
        if self.neptune:
            if fing:
                neptune.init(
                    "dna-i/" + project_name,
                    api_token=
                    'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiMWYzMzhjMjItYjczNC00NzZhLWFlZTYtOTI2NzE5MzUwZmNkIn0=',
                    proxies={
                        'http': "http://httpproxy.fing.edu.uy:3128/",
                        'https': "http://httpproxy.fing.edu.uy:3128/",
                    })
            else:
                neptune.init(
                    "dna-i/" + project_name,
                    api_token=
                    'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiMWYzMzhjMjItYjczNC00NzZhLWFlZTYtOTI2NzE5MzUwZmNkIn0='
                )

            print("logging experiments on neptune project " + project_name)
            neptune.create_experiment()
        if self.comet:
            self.comet_experiment = Experiment(
                api_key="V0OXnWOi4KVNS4OkwLjdnxSgK",
                project_name=project_name,
                workspace="dna-i")
            print("logging experiments on comet project " + project_name)
        if not (self.neptune or self.comet):
            raise ValueError('Logging Backend NOT Available')

    def add_params(self, params, step=None):
        '''
        Adds parameters to experiment log

        Parameters
        ----------
        params : Dict
            Key-Value pairs

        Returns
        -------
        None.

        '''
        if self.neptune:
            for key, value in params.items():
                neptune.set_property(key, value)
            if step is not None:
                neptune.set_property('step', step)
        if self.comet:
            self.comet_experiment.log_parameters(params, step=step)

    def add_params_torch(self, torch_model, step=None):
        '''
        Logs torch model parameter histogram per layer

        Parameters
        ----------
        torch_model : torch nn.Module

        Returns
        -------
        None.

        '''
        if self.neptune:
            raise NotImplementedError
        if self.comet:
            for name, param in torch_model.named_parameters():
                self.comet_experiment.log_histogram_3d(
                    param.detach().cpu().numpy().tolist(),
                    name=name,
                    step=step)

    def log_model_torch(self, model_name, torch_model, step=None):
        '''
        Logs torch model

        Parameters
        ----------
        torch_model : torch nn.Module

        Returns
        -------
        None.

        '''
        if self.neptune:
            raise NotImplementedError
        if self.comet:
            wpath = "./model.pt"
            torch.save(torch_model.state_dict(), wpath)
            self.comet_experiment.log_model(model_name, wpath)
            os.remove(wpath)

    def add_tags(self, tags):
        '''
        Adds parameters to experiment log

        Parameters
        ----------
        params : tags
            list of tags (strings)
            e.g.: ['tag1', 'tag2']
            
        Returns
        -------
        None.

        '''
        if self.neptune:
            neptune.append_tag(tags)
        if self.comet:
            self.comet_experiment.add_tags(tags)

    def log_metrics(self, name, value, epoch=None):
        '''
        Logging pointwise metrics

        Parameters
        ----------
        name : STR
            Metric key
        value : Float/Integer/(Boolean/String)
            Comet also allows Boolean/string
            Tuples are lallowed
        epoch: (OPT)  INT
            Epoch - or anything used as x axis when plotting metrics

        Returns
        -------
        None.

        '''
        if self.neptune:
            try:
                if epoch is not None:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            neptune.log_metric(name + n, epoch, y=val)
                    else:
                        neptune.log_metric(name, epoch, y=value)
                else:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            neptune.log_metric(name + n, val)
                    else:
                        neptune.log_metric(name, value)
            except:
                print("Metric type {} not supported by neptune.".format(
                    type(value)))
                print("logging as text")
                self.log_text("{}".format(value), key=name)

        if self.comet:
            try:
                if epoch is not None:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            self.comet_experiment.log_metric(name + n,
                                                             val,
                                                             step=int(epoch))
                    else:
                        self.comet_experiment.log_metric(name,
                                                         value,
                                                         epoch=epoch)
                else:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            self.comet_experiment.log_metric(name + n, val)
                    else:
                        self.comet_experiment.log_metric(name, value)
            except:
                print("Metric type {} not supported by comet.".format(
                    type(value)))
                if type(value) is tuple:
                    print("Logging tuple as x-y pairs")
                    for idx, val in enumerate(value):
                        self.comet_experiment.log_metric(name, val, epoch=idx)
                else:
                    print("Logging as other.")
                    self.comet_experiment.log_other(name, value)

    def log_text(self, string, key=None, epoch=None):
        '''
          Logs text strings

          Parameters
          ----------
          string : STR
              text to  log
          key: STR
              log_name needed for Neptune strings 
          epoch: INT
              epoch or any other index
          
          Returns
          -------
          None.

        '''
        if self.neptune:
            if type(string) is str:
                if key is None:
                    print('Neptune log_name needed for logging text')
                    print('Using a dummy name: text')
                    neptune.log_text('text', string)
                if epoch is None:
                    neptune.log_text(key, string)
                else:
                    neptune.log_text(key, epoch, y=string)
            else:
                print("Wrong type: logging text must be a string")
        if self.comet:
            if type(string) is str:
                if key is not None:
                    print(
                        "Commet text logging does not  support keys, prepending it to text"
                    )
                    string = key + ', ' + string
                if epoch is None:
                    self.comet_experiment.log_text(string)
                else:
                    self.comet_experiment.log_text(string, step=epoch)
            else:
                print("Wrong type: logging text must be a string")

    def log_figure(self, figure=None, figure_name=None, step=None):
        '''
        Logs pyplot figure

        Parameters
        ----------
        figure : pyplot figure, optional in comet mandatory in neptune.
            The default is None, uses global pyplot figure.
        figure_name : STR, optional in comet mandatory in neptune.
             The default is None.
        step : INT, optional
            An index. The default is None.

        Returns
        -------
        None.

        '''
        if self.neptune:
            if figure is not None:
                if figure_name is None:
                    print("Figure name must be given to neptune logger")
                    print("Using dummy name: figure")
                    figure_name = 'figure'
                if step is None:
                    neptune.log_image(figure_name, figure)
                else:
                    neptune.log_image(figure_name, step, y=figure)
            else:
                print("A figure must be passed to neptune logger")
        if self.comet:
            self.comet_experiment.log_figure(figure_name=figure_name,
                                             figure=figure,
                                             step=step)

    def stop(self):
        if self.neptune:
            neptune.stop()
        if self.comet:
            self.comet_experiment.end()

    def add_table(self, filename, tabular_data=None, headers=False):

        self.comet_experiment.log_table(filename, tabular_data, headers)

    def log_image(self, image=None, figure_name=None, step=None):
        '''
        Logs pyplot figure

        Parameters
        ----------
        figure : pyplot figure, optional in comet mandatory in neptune.
            The default is None, uses global pyplot figure.
        figure_name : STR, optional in comet mandatory in neptune.
             The default is None.
        step : INT, optional
            An index. The default is None.

        Returns
        -------
        None.

        '''
        self.log_image(image, name=figure_name, overwrite=False, image_format="png", image_scale=1.0, \
                       image_shape=None, image_colormap=None, image_minmax=None, image_channels="last", \
                       copy_to_tmp=True, step=step)

    def log_hist3d(self, values=None, figure_name=None, step=None):
        '''
        Logs pyplot figure
    
        Parameters
        ----------
        figure : pyplot figure, optional in comet mandatory in neptune.
            The default is None, uses global pyplot figure.
        figure_name : STR, optional in comet mandatory in neptune.
             The default is None.
        step : INT, optional
            An index. The default is None.
    
        Returns
        -------
        None.
    
        '''
        if self.neptune:
            print("not implemented")
        if self.comet:
            self.comet_experiment.log_histogram_3d(values,
                                                   name=figure_name,
                                                   step=step)

    def log_table(self, name=None, data=None, headers=False):
        '''
        

        Parameters
        ----------
        name : str
            Table name
        data : array, list
            
        headers : TYPE, optional
            wether to use headers

        Returns
        -------
        None.

        '''
        self.comet_experiment.log_table(name + '.csv',
                                        tabular_data=data,
                                        headers=headers)
コード例 #8
0
class eICU_Operator(TrainingOperator):
    def setup(self, config):
        # Number of RaySGD workers
        self.num_workers = config.get('num_workers', 1)
        # Fetch the Comet ML credentials
        self.comet_ml_api_key = config['comet_ml_api_key']
        self.comet_ml_project_name = config['comet_ml_project_name']
        self.comet_ml_workspace = config['comet_ml_workspace']
        self.log_comet_ml = config.get('log_comet_ml', True)
        self.comet_ml_save_model = config.get('comet_ml_save_model', True)
        # Fetch model and dataset parameters
        self.model_class = config.get('model', 'VanillaRNN')  # Model class
        self.dataset_mode = config.get(
            'dataset_mode', 'one hot encoded'
        )  # The mode in which we'll use the data, either one hot encoded or pre-embedded
        self.ml_core = config.get(
            'ml_core', 'deep learning'
        )  # The core machine learning type we'll use; either traditional ML or DL
        self.use_delta_ts = config.get(
            'use_delta_ts',
            False)  # Indicates if we'll use time variation info
        self.time_window_h = config.get(
            'time_window_h',
            48)  # Number of hours on which we want to predict mortality
        # Additional properties and relevant training information
        self.step = 0  # Number of iteration steps done so far
        self.print_every = config.get(
            'print_every', 10)  # Steps interval where the metrics are printed
        self.val_loss_min = np.inf  # Start with an infinitely big minimum validation loss
        self.clip_value = config.get(
            'clip_value',
            0.5)  # Gradient clipping value, to avoid exploiding gradients
        self.features_list = config.get(
            'features_list',
            None)  # Names of the features being used in the current pipeline
        self.model_type = config.get(
            'model_type', 'multivariate_rnn')  # Type of model to train
        self.padding_value = config.get(
            'padding_value',
            999999)  # Value to use in the padding, to fill the sequences
        self.cols_to_remove = config.get(
            'cols_to_remove', [0, 1]
        )  # List of indices of columns to remove from the features before feeding to the model
        self.is_custom = config.get(
            'is_custom',
            False)  # Specifies if the model being used is a custom built one
        self.already_embedded = config.get(
            'already_embedded', False
        )  # Indicates if the categorical features are already embedded when fetching a batch
        self.batch_size = config.get(
            'batch_size', 32
        )  # The number of samples used in each training, validation or test iteration
        self.n_epochs = config.get(
            'n_epochs', 1
        )  # Number of epochs, i.e. the number of times to iterate through all of the training data
        self.lr = config.get('lr', 0.001)  # Learning rate
        self.models_path = config.get(
            'models_path',
            '')  # Path to the directory where the models are stored
        self.see_progress = config.get(
            'see_progress', True
        )  # Sets if a progress bar is shown for each training and validation loop
        # Register all the hyperparameters
        if self.num_workers == 1:
            model = self.model
        else:
            # Get the original model, as the current one is wrapped in DistributedDataParallel
            model = self.model.module
        model_args = inspect.getfullargspec(model.__init__).args[1:]
        self.hyper_params = dict([(param, getattr(model, param))
                                  for param in model_args])
        self.hyper_params.update({
            'batch_size': self.batch_size,
            'n_epochs': self.n_epochs,
            'learning_rate': self.lr
        })
        if self.log_comet_ml is True:
            # Create a new Comet.ml experiment
            self.experiment = Experiment(
                api_key=self.comet_ml_api_key,
                project_name=self.comet_ml_project_name,
                workspace=self.comet_ml_workspace,
                auto_param_logging=False,
                auto_metric_logging=False,
                auto_output_logging=False)
            self.experiment.log_other('completed', False)
            self.experiment.log_other('random_seed', du.random_seed)
            # Report hyperparameters to Comet.ml
            self.experiment.log_parameters(self.hyper_params)
            self.experiment.log_parameters(config)
            if self.features_list is not None:
                # Log the names of the features being used
                self.experiment.log_other('features_list', self.features_list)
        if self.clip_value is not None:
            # Set gradient clipping to avoid exploding gradients
            for p in self.model.parameters():
                p.register_hook(lambda grad: torch.clamp(
                    grad, -self.clip_value, self.clip_value))

    def set_model_filename(self, val_loss):
        # Start with the model class name
        if self.model_class == 'VanillaRNN':
            model_filename = 'rnn'
        elif self.model_class == 'VanillaLSTM':
            model_filename = 'lstm'
        elif self.model_class == 'TLSTM':
            model_filename = 'tlstm'
        elif self.model_class == 'MF1LSTM':
            model_filename = 'mf1lstm'
        elif self.model_class == 'MF2LSTM':
            model_filename = 'mf2lstm'
        else:
            raise Exception(
                f'ERROR: {self.model_class} is an invalid model type. Please specify either "VanillaRNN", "VanillaLSTM", "TLSTM", "MF1LSTM" or "MF2LSTM".'
            )
        # Add dataset mode information
        if self.dataset_mode == 'pre-embedded':
            model_filename = model_filename + '_pre_embedded'
        elif self.dataset_mode == 'learn embedding':
            model_filename = model_filename + '_with_embedding'
        elif self.dataset_mode == 'one hot encoded':
            model_filename = model_filename + '_one_hot_encoded'
        # Use of time variation information
        if self.use_delta_ts is not False and (self.model_class == 'VanillaRNN'
                                               or self.model_class
                                               == 'VanillaLSTM'):
            model_filename = model_filename + '_delta_ts'
        # Add the validation loss and timestamp
        current_datetime = datetime.now().strftime('%d_%m_%Y_%H_%M')
        model_filename = f'{val_loss:.4f}_valloss_{model_filename}_{current_datetime}.pth'
        return model_filename

    @override(TrainingOperator)
    def validate(self, val_iterator, info):
        # Number of iteration steps done so far
        step = info.get('step', 0)
        # Initialize the validation metrics
        val_loss = 0
        val_acc = 0
        val_auc = list()
        if self.num_workers == 1:
            model = self.model
        else:
            # Get the original model, as the current one is wrapped in DistributedDataParallel
            model = self.model.module
        if model.n_outputs > 1:
            val_auc_wgt = list()
        # Loop through the validation data
        for features, labels in du.utils.iterations_loop(
                val_iterator, see_progress=self.see_progress,
                desc='Val batches'):
            # Turn off gradients for validation, saves memory and computations
            with torch.no_grad():
                if self.is_custom is False:
                    # Find the original sequence lengths
                    seq_lengths = du.search_explore.find_seq_len(
                        labels, padding_value=self.padding_value)
                else:
                    # No need to find the sequence lengths now
                    seq_lengths = None
                if self.use_gpu is True:
                    # Move data to GPU
                    features, labels = features.to(self.device), labels.to(
                        self.device)
                # Do inference on the data
                if self.model_type.lower() == 'multivariate_rnn':
                    (pred, correct_pred, scores, labels,
                     loss) = (du.deep_learning.inference_iter_multi_var_rnn(
                         self.model,
                         features,
                         labels,
                         padding_value=self.padding_value,
                         cols_to_remove=self.cols_to_remove,
                         is_train=False,
                         prob_output=True,
                         is_custom=self.is_custom,
                         already_embedded=self.already_embedded,
                         seq_lengths=seq_lengths,
                         distributed_train=(self.num_workers > 1)))
                elif self.model_type.lower() == 'mlp':
                    pred, correct_pred, scores, loss = (
                        du.deep_learning.inference_iter_mlp(
                            self.model,
                            features,
                            labels,
                            self.cols_to_remove,
                            is_train=False,
                            prob_output=True))
                else:
                    raise Exception(
                        f'ERROR: Invalid model type. It must be "multivariate_rnn" or "mlp", not {self.model_type}.'
                    )
                val_loss += loss  # Add the validation loss of the current batch
                val_acc += torch.mean(
                    correct_pred.type(torch.FloatTensor)
                )  # Add the validation accuracy of the current batch, ignoring all padding values
                if self.use_gpu is True:
                    # Move data to CPU for performance computations
                    scores, labels = scores.cpu(), labels.cpu()
                # Add the training ROC AUC of the current batch
                if model.n_outputs == 1:
                    try:
                        val_auc.append(
                            roc_auc_score(labels.numpy(),
                                          scores.detach().numpy()))
                    except Exception as e:
                        warnings.warn(
                            f'Couldn\'t calculate the validation AUC on step {step}. Received exception "{str(e)}".'
                        )
                else:
                    # It might happen that not all labels are present in the current batch;
                    # as such, we must focus on the ones that appear in the batch
                    labels_in_batch = labels.unique().long()
                    try:
                        val_auc.append(
                            roc_auc_score(labels.numpy(),
                                          softmax(scores[:, labels_in_batch],
                                                  dim=1).detach().numpy(),
                                          multi_class='ovr',
                                          average='macro',
                                          labels=labels_in_batch.numpy()))
                        # Also calculate a weighted version of the AUC; important for imbalanced dataset
                        val_auc_wgt.append(
                            roc_auc_score(labels.numpy(),
                                          softmax(scores[:, labels_in_batch],
                                                  dim=1).detach().numpy(),
                                          multi_class='ovr',
                                          average='weighted',
                                          labels=labels_in_batch.numpy()))
                    except Exception as e:
                        warnings.warn(
                            f'Couldn\'t calculate the validation AUC on step {step}. Received exception "{str(e)}".'
                        )
                # Remove the current features and labels from memory
                del features
                del labels
        # Calculate the average of the metrics over the batches
        val_loss = val_loss / len(val_iterator)
        val_acc = val_acc / len(val_iterator)
        val_auc = np.mean(val_auc)
        if model.n_outputs > 1:
            val_auc_wgt = np.mean(val_auc_wgt)
        # Return the validation metrics
        metrics = dict(val_loss=val_loss, val_acc=val_acc, val_auc=val_auc)
        if model.n_outputs > 1:
            metrics['val_auc_wgt'] = val_auc_wgt
        return metrics

    @override(TrainingOperator)
    def train_epoch(self, iterator, info):
        if self.num_workers == 1:
            model = self.model
        else:
            # Get the original model, as the current one is wrapped in DistributedDataParallel
            model = self.model.module
        print(f'DEBUG: TrainingOperator attributes:\n{vars(self)}')
        print(f'DEBUG: Model\'s attributes:\n{vars(model)}')
        # Register the current epoch
        epoch = info.get('epoch_idx', 0)
        # Number of iteration steps done so far
        step = info.get('step', 0)
        # Initialize the training metrics
        train_loss = 0
        train_acc = 0
        train_auc = list()
        if model.n_outputs > 1:
            train_auc_wgt = list()
        # try:
        # Loop through the training data
        for features, labels in du.utils.iterations_loop(
                iterator, see_progress=self.see_progress, desc='Steps'):
            # Activate dropout to train the model
            self.model.train()
            # Clear the gradients of all optimized variables
            self.optimizer.zero_grad()
            if self.is_custom is False:
                # Find the original sequence lengths
                seq_lengths = du.search_explore.find_seq_len(
                    labels, padding_value=self.padding_value)
            else:
                # No need to find the sequence lengths now
                seq_lengths = None
            if self.use_gpu is True:
                # Move data to GPU
                features, labels = features.to(self.device), labels.to(
                    self.device)
            # Do inference on the data
            if self.model_type.lower() == 'multivariate_rnn':
                (pred, correct_pred, scores, labels, step_train_loss) = (
                    du.deep_learning.inference_iter_multi_var_rnn(
                        self.model,
                        features,
                        labels,
                        padding_value=self.padding_value,
                        cols_to_remove=self.cols_to_remove,
                        is_train=True,
                        prob_output=True,
                        optimizer=self.optimizer,
                        is_custom=self.is_custom,
                        already_embedded=self.already_embedded,
                        seq_lengths=seq_lengths,
                        distributed_train=(self.num_workers > 1)))
            elif self.model_type.lower() == 'mlp':
                pred, correct_pred, scores,
                step_train_loss = (du.deep_learning.inference_iter_mlp(
                    self.model,
                    features,
                    labels,
                    self.cols_to_remove,
                    is_train=True,
                    prob_output=True,
                    optimizer=self.optimizer))
            else:
                raise Exception(
                    f'ERROR: Invalid model type. It must be "multivariate_rnn" or "mlp", not {self.model_type}.'
                )
            # Add the training loss and accuracy of the current batch
            train_loss += step_train_loss
            step_train_acc = torch.mean(correct_pred.type(torch.FloatTensor))
            train_acc += step_train_acc
            if self.use_gpu is True:
                # Move data to CPU for performance computations
                scores, labels = scores.cpu(), labels.cpu()
            # Add the training ROC AUC of the current batch
            if model.n_outputs == 1:
                try:
                    step_train_auc = roc_auc_score(labels.numpy(),
                                                   scores.detach().numpy())
                    train_auc.append(step_train_auc)
                except Exception as e:
                    warnings.warn(
                        f'Couldn\'t calculate the training AUC on step {step}. Received exception "{str(e)}".'
                    )
                    step_train_auc = None
            else:
                # It might happen that not all labels are present in the current batch;
                # as such, we must focus on the ones that appear in the batch
                labels_in_batch = labels.unique().long()
                try:
                    step_train_auc = roc_auc_score(
                        labels.numpy(),
                        softmax(scores[:, labels_in_batch],
                                dim=1).detach().numpy(),
                        multi_class='ovr',
                        average='macro',
                        labels=labels_in_batch.numpy())
                    train_auc.append(step_train_auc)
                    # Also calculate a weighted version of the AUC; important for imbalanced dataset
                    step_train_auc_wgt = roc_auc_score(
                        labels.numpy(),
                        softmax(scores[:, labels_in_batch],
                                dim=1).detach().numpy(),
                        multi_class='ovr',
                        average='weighted',
                        labels=labels_in_batch.numpy())
                    train_auc_wgt.append(step_train_auc_wgt)
                except Exception as e:
                    warnings.warn(
                        f'Couldn\'t calculate the training AUC on step {step}. Received exception "{str(e)}".'
                    )
                    step_train_auc = None
                    step_train_auc_wgt = None
            # Count one more iteration step
            step += 1
            info['step'] = step
            # Deactivate dropout to test the model
            self.model.eval()
            # Remove the current features and labels from memory
            del features
            del labels
            # Run the current model on the validation set
            val_metrics = self.validate(self.validation_loader, info)
            if self.log_comet_ml is True:
                # Upload the current step's metrics to Comet ML
                self.experiment.log_metric('train_loss',
                                           step_train_loss,
                                           step=step)
                self.experiment.log_metric('train_acc',
                                           step_train_acc,
                                           step=step)
                self.experiment.log_metric('train_auc',
                                           step_train_auc,
                                           step=step)
                self.experiment.log_metric('val_loss',
                                           val_metrics['val_loss'],
                                           step=step)
                self.experiment.log_metric('val_acc',
                                           val_metrics['val_acc'],
                                           step=step)
                self.experiment.log_metric('val_auc',
                                           val_metrics['val_auc'],
                                           step=step)
                if model.n_outputs > 1:
                    self.experiment.log_metric('train_auc_wgt',
                                               step_train_auc_wgt,
                                               step=step)
                    self.experiment.log_metric('val_auc_wgt',
                                               val_metrics['val_auc_wgt'],
                                               step=step)
            # Display validation loss
            if step % self.print_every == 0:
                print(
                    f'Epoch {epoch} step {step}: Validation loss: {val_metrics["val_loss"]}; Validation Accuracy: {val_metrics["val_acc"]}; Validation AUC: {val_metrics["val_auc"]}'
                )
            # Check if the performance obtained in the validation set is the best so far (lowest loss value)
            if val_metrics['val_loss'] < self.val_loss_min:
                print(
                    f'New minimum validation loss: {self.val_loss_min} -> {val_metrics["val_loss"]}.'
                )
                # Update the minimum validation loss
                self.val_loss_min = val_metrics['val_loss']
                # Filename and path where the model will be saved
                model_filename = self.set_model_filename(
                    val_metrics['val_loss'])
                print(f'Saving model in {model_filename}')
                # Save the best performing model so far, along with additional information to implement it
                checkpoint = self.hyper_params
                checkpoint['state_dict'] = self.model.state_dict()
                torch.save(checkpoint, model_filename)
                # [TODO] Check if this really works locally or if it just saves in the temporary nodes
                # self.save(checkpoint, f'{self.models_path}{model_filename}')
                if self.log_comet_ml is True and self.comet_ml_save_model is True:
                    # Upload the model to Comet.ml
                    self.experiment.log_model(name=model_filename,
                                              file_or_folder=model_filename,
                                              overwrite=True)
        # except Exception as e:
        #     warnings.warn(f'There was a problem doing training epoch {epoch}. Ending current epoch. Original exception message: "{str(e)}"')
        # try:
        # Calculate the average of the metrics over the epoch
        train_loss = train_loss / len(iterator)
        train_acc = train_acc / len(iterator)
        train_auc = np.mean(train_auc)
        if model.n_outputs > 1:
            train_auc_wgt = np.mean(train_auc_wgt)
        # Remove attached gradients so as to be able to print the values
        train_loss, val_loss = train_loss.detach(
        ), val_metrics['val_loss'].detach()
        if self.use_gpu is True:
            # Move metrics data to CPU
            train_loss, val_loss = train_loss.cpu(), val_loss.cpu()
        if self.log_comet_ml is True:
            # Upload the current epoch's metrics to Comet ML
            self.experiment.log_metric('train_loss', train_loss, epoch=epoch)
            self.experiment.log_metric('train_acc', train_acc, epoch=epoch)
            self.experiment.log_metric('train_auc', train_auc, epoch=epoch)
            self.experiment.log_metric('val_loss', val_loss, epoch=epoch)
            self.experiment.log_metric('val_acc',
                                       val_metrics['val_acc'],
                                       epoch=epoch)
            self.experiment.log_metric('val_auc',
                                       val_metrics['val_auc'],
                                       epoch=epoch)
            self.experiment.log_epoch_end(epoch, epoch=step)
            if model.n_outputs > 1:
                self.experiment.log_metric('train_auc_wgt',
                                           train_auc_wgt,
                                           epoch=epoch)
                self.experiment.log_metric('val_auc_wgt',
                                           val_metrics['val_auc_wgt'],
                                           epoch=epoch)
        # Print a report of the epoch
        print(
            f'Epoch {epoch}: Training loss: {train_loss}; Training Accuracy: {train_acc}; Training AUC: {train_auc}; \
                Validation loss: {val_loss}; Validation Accuracy: {val_metrics["val_acc"]}; Validation AUC: {val_metrics["val_auc"]}'
        )
        print('----------------------')
        # except Exception as e:
        #     warnings.warn(f'There was a problem printing metrics from epoch {epoch}. Original exception message: "{str(e)}"')
        # Return the training metrics
        metrics = dict(train_loss=train_loss,
                       train_acc=train_acc,
                       train_auc=train_auc)
        if model.n_outputs > 1:
            metrics['train_auc_wgt'] = train_auc_wgt
        return metrics