Ejemplo n.º 1
0
def fit_validate(exp_params, k, data_path, write_path, others=None, custom_tag=''):
    """Fit model and compute metrics on train and validation set. Intended for hyperparameter search.

    Only logs final metrics and scatter plot of final embedding.

    Args:
        exp_params(dict): Parameter dict. Should at least have keys model_name, dataset_name & random_state. Other
        keys are assumed to be model parameters.
        k(int): Fold identifier.
        data_path(str): Data directory.
        write_path(str): Where to write temp files.
        others(dict): Other things to log to Comet experiment.
        custom_tag(str): Custom tag for comet experiment.

    """
    # Comet experiment
    exp = Experiment(parse_args=False)
    exp.disable_mp()
    custom_tag += '_validate'
    exp.add_tag(custom_tag)
    exp.log_parameters(exp_params)

    if others is not None:
        exp.log_others(others)

    # Parse experiment parameters
    model_name, dataset_name, random_state, model_params = parse_params(exp_params)

    # Fetch and split dataset.
    data_train = getattr(grae.data, dataset_name)(split='train', random_state=random_state, data_path=data_path)
    data_train, data_val = data_train.validation_split(random_state=FOLD_SEEDS[k])

    # Model
    m = getattr(grae.models, model_name)(random_state=FOLD_SEEDS[k], **model_params)
    m.write_path = write_path
    m.data_val = data_val

    with exp.train():
        m.fit(data_train)

        # Log plot
        m.comet_exp = exp
        m.plot(data_train, data_val, title=f'{model_name} : {dataset_name}')

        # Probe embedding
        prober = EmbeddingProber()
        prober.fit(model=m, dataset=data_train, mse_only=True)
        train_z, train_metrics = prober.score(data_train, is_train=True)

        # Log train metrics
        exp.log_metrics(train_metrics)

    with exp.validate():
        val_z, val_metrics = prober.score(data_val)

        # Log train metrics
        exp.log_metrics(val_metrics)

    # Log marker to mark successful experiment
    exp.log_other('success', 1)
Ejemplo n.º 2
0
def setup_comet_ml():
    """Initialise Experiment object."""
    experiment = Experiment(
        api_key=config.COMET_API_KEY,
        disabled=True if not config.COMET_MONITOR else False,
        log_code=False,
        project_name=config.COMET_PROJECT_NAME,
        workspace=config.COMET_WORKSPACE,
    )

    experiment.set_name(config.EXPERIMENT_NAME)
    experiment.log_others({
        "conditioning": config.EXPERIMENT_Z,
        "dataset": config.EXPERIMENT_DATASET,
    })
    experiment.log_parameters({
        "batch_size": config.EXPERIMENT_BATCH_SIZE,
        "epochs": config.EXPERIMENT_EPOCHS,
    })

    return experiment
"""
# import comet_ml in the top of your file
from comet_ml import Experiment
import json
from pathlib import Path
from src.arg_parser import get_parser
# Add the following code anywhere in your machine learning file
experiment = Experiment(api_key="K96HV1ZN57Ip54lRy1GNaOpBN",
                        project_name="fraudulentuserdetection",
                        workspace="watarukudo0914")
# from src.models import sdgcn, sgcn, rev2, rgcn

parser = get_parser()
args = parser.parse_args()
experiment.log_others({
    'model_name': args.model_name,
    'data_name': args.data_name,
})
"""
model設定
"""
if args.model_name == 'sdgcn':
    from src.models import sdgcn
    model = sdgcn
elif args.model_name == 'sgcn':
    from src.models import sgcn
    model = sgcn
elif args.model_name == 'rgcn':
    from src.models import rgcn
    model = rgcn
elif args.model_name == 'rev2':
    from src.models import rev2
Ejemplo n.º 4
0
class Dashboard:
    """Record training/evaluation statistics to comet
    :param Path log_dir
    :param list taskid_to_name
    """
    def __init__(self, config, paras, log_dir, train_type, resume=False):
        self.log_dir = log_dir
        self.expkey_f = Path(self.log_dir, 'exp_key')
        self.global_step = 1

        if resume:
            assert self.expkey_f.exists(
            ), f"Cannot find comet exp key in {self.log_dir}"
            with open(Path(self.log_dir, 'exp_key'), 'r') as f:
                exp_key = f.read().strip()
            self.exp = ExistingExperiment(
                previous_experiment=exp_key,
                project_name=COMET_PROJECT_NAME,
                workspace=COMET_WORKSPACE,
                auto_output_logging=None,
                auto_metric_logging=None,
                display_summary_level=0,
            )
        else:
            self.exp = Experiment(
                project_name=COMET_PROJECT_NAME,
                workspace=COMET_WORKSPACE,
                auto_output_logging=None,
                auto_metric_logging=None,
                display_summary_level=0,
            )
            #TODO: is there exists better way to do this?
            with open(self.expkey_f, 'w') as f:
                print(self.exp.get_key(), file=f)

            self.exp.log_other('seed', paras.seed)
            self.log_config(config)
            if train_type == 'evaluation':
                if paras.pretrain:
                    self.exp.set_name(
                        f"{paras.pretrain_suffix}-{paras.eval_suffix}")
                    self.exp.add_tags([
                        paras.pretrain_suffix, config['solver']['setting'],
                        paras.accent, paras.algo, paras.eval_suffix
                    ])
                    if paras.pretrain_model_path:
                        self.exp.log_other("pretrain-model-path",
                                           paras.pretrain_model_path)
                    else:
                        self.exp.log_other("pretrain-runs",
                                           paras.pretrain_runs)
                        self.exp.log_other("pretrain-setting",
                                           paras.pretrain_setting)
                        self.exp.log_other("pretrain-tgt-accent",
                                           paras.pretrain_tgt_accent)
                else:
                    self.exp.set_name(paras.eval_suffix)
                    self.exp.add_tags(
                        ["mono", config['solver']['setting'], paras.accent])
            else:
                self.exp.set_name(paras.pretrain_suffix)
                self.exp.log_others({
                    f"accent{i}": k
                    for i, k in enumerate(paras.pretrain_accents)
                })
                self.exp.log_other('accent', paras.tgt_accent)
                self.exp.add_tags([
                    paras.algo, config['solver']['setting'], paras.tgt_accent
                ])
            #TODO: Need to add pretrain setting

        ##slurm-related
        hostname = os.uname()[1]
        if len(hostname.split('.')) == 2 and hostname.split(
                '.')[1] == 'speech':
            logger.notice(f"Running on Battleship {hostname}")
            self.exp.log_other('jobid', int(os.getenv('SLURM_JOBID')))
        else:
            logger.notice(f"Running on {hostname}")

    def log_config(self, config):
        #NOTE: depth at most 2
        for block in config:
            for n, p in config[block].items():
                if isinstance(p, dict):
                    self.exp.log_parameters(p, prefix=f'{block}-{n}')
                else:
                    self.exp.log_parameter(f'{block}-{n}', p)

    def set_status(self, status):
        self.exp.log_other('status', status)

    def step(self, n=1):
        self.global_step += n

    def set_step(self, global_step=1):
        self.global_step = global_step

    def log_info(self, prefix, info):
        self.exp.log_metrics({k: float(v)
                              for k, v in info.items()},
                             prefix=prefix,
                             step=self.global_step)

    def log_other(self, name, value):
        self.exp.log_metric(name, value, step=self.global_step)

    def log_step(self):
        self.exp.log_other('step', self.global_step)

    def add_figure(self, fig_name, data):
        self.exp.log_figure(figure_name=fig_name,
                            figure=data,
                            step=self.global_step)

    def check(self):
        if not self.exp.alive:
            logger.warning("Comet logging stopped")
Ejemplo n.º 5
0
def fit_test(exp_params, data_path, k, write_path, others=None, custom_tag=''):
    """Fit model and compute metrics on both train and test sets.

    Also log plot and embeddings to comet.

    Args:
        exp_params(dict): Parameter dict. Should at least have keys model_name, dataset_name & random_state. Other
        keys are assumed to be model parameters.
        k(int): Fold identifier.
        data_path(str): Data directory.
        write_path(str): Where temp files can be written.
        others(dict): Other things to log to Comet experiment.
        custom_tag(str): Custom tag for Comet experiment.

    """
    # Increment fold to avoid reusing validation seeds
    k += 10

    # Comet experiment
    exp = Experiment(parse_args=False)
    exp.disable_mp()
    custom_tag += '_test'
    exp.add_tag(custom_tag)
    exp.log_parameters(exp_params)

    if others is not None:
        exp.log_others(others)

    # Parse experiment parameters
    model_name, dataset_name, random_state, model_params = parse_params(exp_params)

    # Fetch and split dataset.
    data_train_full = getattr(grae.data, dataset_name)(split='train', random_state=random_state, data_path=data_path)
    data_test = getattr(grae.data, dataset_name)(split='test', random_state=random_state, data_path=data_path)

    if model_name == 'PCA':
        # No validation split on PCA
        data_train, data_val = data_train_full, None
    else:
        data_train, data_val = data_train_full.validation_split(random_state=FOLD_SEEDS[k])

    # Model
    m = getattr(grae.models, model_name)(random_state=FOLD_SEEDS[k], **model_params)
    m.comet_exp = exp  # Used by DL models to log metrics between epochs
    m.write_path = write_path
    m.data_val = data_val  # For early stopping

    # Benchmark fit time
    fit_start = time.time()

    m.fit(data_train)

    fit_stop = time.time()

    fit_time = fit_stop - fit_start

    # Log plots
    m.plot(data_train, data_test, title=f'{model_name}_{dataset_name}')
    if dataset_name in ['Faces', 'RotatedDigits', 'UMIST', 'Tracking', 'COIL100', 'Teapot']:
        m.view_img_rec(data_train, choice='random', title=f'{model_name}_{dataset_name}_train_rec')
        m.view_img_rec(data_test, choice='best', title=f'{model_name}_{dataset_name}_test_rec_best')
        m.view_img_rec(data_test, choice='worst', title=f'{model_name}_{dataset_name}_test_rec_worst')
    elif dataset_name in ['ToroidalHelices', 'Mammoth'] or 'SwissRoll' in dataset_name:
        m.view_surface_rec(data_train, title=f'{model_name}_{dataset_name}_train_rec', dataset_name=dataset_name)
        m.view_surface_rec(data_test, title=f'{model_name}_{dataset_name}_test_rec', dataset_name=dataset_name)

    # Score models
    prober = EmbeddingProber()
    prober.fit(model=m, dataset=data_train_full)

    with exp.train():
        train_z, train_metrics = prober.score(data_train_full)
        _, train_y = data_train_full.numpy()

        # Log train metrics
        exp.log_metric(name='fit_time', value=fit_time)
        exp.log_metrics(train_metrics)

    with exp.test():
        test_z, test_metrics = prober.score(data_test)
        _, test_y = data_test.numpy()

        # Log train metrics
        exp.log_metrics(test_metrics)

    # Log embedding as .npy file
    file_name = os.path.join(write_path, f'emb_{model_name}_{dataset_name}.npy')
    save_dict(dict(train_z=train_z,
                   train_y=train_y,
                   test_z=test_z,
                   test_y=test_y,
                   random_state=random_state,
                   dataset_name=dataset_name,
                   model_name=model_name),
              file_name)
    file = open(file_name, 'rb')
    exp.log_asset(file, file_name=file_name)
    file.close()
    os.remove(file_name)

    # Log marker to mark successful experiment
    exp.log_other('success', 1)