Ejemplo n.º 1
0
    def train(self):
        os.mkdir(self.paths['path'])
        if self.use_comet and self.api_key and self.project_name and self.workspace:
            experiment = Experiment(api_key=self.api_key,
                                    project_name=self.project_name,
                                    workspace=self.workspace)
            experiment.log_dataset_hash(self.train_dataset)
            experiment.add_tags([
                str(self.architecture), "text_generation",
                f"nb_labels_{self.number_labels}"
            ])
            with experiment.train():
                hist = self.fit_dataset(self.train_dataset, self.val_dataset,
                                        self.epochs)
            experiment.end()
        elif self.use_comet:
            raise Exception(
                "Please provide an api_key, project_name and workspace for comet_ml"
            )
        else:
            callbacks = self.callback_func(
                tensorboard_dir=self.paths['tensorboard_path'],
                checkpoint_path=self.paths['checkpoint_path'])
            hist = self.model.fit_dataset(self.train_dataset, self.val_dataset,
                                          self.epochs, callbacks)

        self.metrics = get_metrics(hist, "sparse_categorical_accuracy")
        self.export_weights(self.model)
        self.export_info(self.model_info)
        self.export_metrics(self.metrics)
        self.export_tokenizer(self.tokenizer)
        if self.do_zip_model:
            self.zip_model()
Ejemplo n.º 2
0
    def init_callbacks(self):
        self.callbacks.append(
            ModelCheckpoint(
                filepath=os.path.join(
                    self.config.callbacks.checkpoint_dir,
                    '%s-{epoch:02d}-{val_loss:.2f}.hdf5' %
                    self.config.exp.name),
                monitor=self.config.callbacks.checkpoint_monitor,
                mode=self.config.callbacks.checkpoint_mode,
                save_best_only=self.config.callbacks.checkpoint_save_best_only,
                save_weights_only=self.config.callbacks.
                checkpoint_save_weights_only,
                verbose=self.config.callbacks.checkpoint_verbose,
            ))

        self.callbacks.append(
            TensorBoard(
                log_dir=self.config.callbacks.tensorboard_log_dir,
                write_graph=self.config.callbacks.tensorboard_write_graph,
            ))

        if hasattr(self.config, "comet_api_key"):
            from comet_ml import Experiment
            experiment = Experiment(api_key=self.config.comet_api_key,
                                    project_name=self.config.exp_name)
            experiment.add_tags(self.config.tags)
            experiment.disable_mp()
            experiment.log_parameters(self.config)
            self.callbacks.append(experiment.get_keras_callback())
Ejemplo n.º 3
0
class CometTracker:
    def __init__(self, comet_params, run_params=None, prev_exp_id=None):
        if prev_exp_id:  # previous experiment
            api_key = comet_params['api_key']
            del comet_params[
                'api_key']  # removing this because the rest of the items need to be passed
            self.experiment = ExistingExperiment(
                api_key=api_key,
                previous_experiment=prev_exp_id,
                **comet_params)
            print(
                f'In CometTracker: ExistingExperiment initialized with id: {prev_exp_id}'
            )

        else:  # new experiment
            self.experiment = Experiment(**comet_params)
            self.experiment.log_parameters(run_params)

    def track_metric(self, metric, value, step):
        self.experiment.log_metric(metric, value, step)

    def add_tags(self, tags):
        self.experiment.add_tags(tags)
        print(f'In [add_tags]: Added these tags to the new experiment: {tags}')

    def set_name(self, name):
        self.experiment.set_name(name)
Ejemplo n.º 4
0
def main(model, optimizer, logger, optimized_function, project_name,
         work_space, tags, model_config_file, optimizer_config_file, epochs,
         n_samples, step_data_gen, n_samples_per_dim, reuse_optimizer,
         reuse_model, shift_model, finetune_model, init_psi):
    model_config = getattr(__import__(model_config_file), 'model_config')
    optimizer_config = getattr(__import__(optimizer_config_file),
                               'optimizer_config')
    print(optimizer_config)
    init_psi = torch.tensor([float(x.strip())
                             for x in init_psi.split(',')]).float().to(device)
    psi_dim = len(init_psi)
    model_config['psi_dim'] = psi_dim
    model_config['n_samples'] = n_samples
    model_config['n_samples_per_dim'] = n_samples_per_dim
    optimizer_config['x_step'] = step_data_gen

    optimized_function_cls = str_to_class(optimized_function)
    model_cls = str_to_class(model)
    optimizer_cls = str_to_class(optimizer)

    experiment = Experiment(project_name=project_name, workspace=work_space)
    experiment.add_tags([x.strip() for x in tags.split(',')])
    experiment.log_parameter('model_type', model)
    experiment.log_parameter('optimizer_type', optimizer)
    experiment.log_parameters(
        {"model_{}".format(key): value
         for key, value in model_config.items()})
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.items()
    })
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.get('line_search_options',
                                               {}).items()
    })
    # experiment.log_asset("./gan_model.py", overwrite=True)
    # experiment.log_asset("./optim.py", overwrite=True)
    # experiment.log_asset("./train.py", overwrite=True)
    # experiment.log_asset("../model.py", overwrite=True)

    logger = str_to_class(logger)(experiment)

    end_to_end_training(epochs=epochs,
                        model_cls=model_cls,
                        optimizer_cls=optimizer_cls,
                        optimized_function_cls=optimized_function_cls,
                        logger=logger,
                        model_config=model_config,
                        optimizer_config=optimizer_config,
                        current_psi=init_psi,
                        n_samples_per_dim=n_samples_per_dim,
                        step_data_gen=step_data_gen,
                        n_samples=n_samples,
                        reuse_optimizer=reuse_optimizer,
                        reuse_model=reuse_model,
                        shift_model=shift_model,
                        finetune_model=finetune_model,
                        experiment=experiment)
Ejemplo n.º 5
0
def main(logger, optimized_function, optimizer, diff_scheme,
         optimizer_config_file, project_name, work_space, tags,
         num_repetitions, n, h, use_true_grad, init_psi, p):
    device = torch.device('cpu')
    print("Using device = {}".format(device))
    optimizer_config = getattr(__import__(optimizer_config_file),
                               'optimizer_config')
    init_psi = torch.tensor([float(x.strip())
                             for x in init_psi.split(',')]).float().to(device)
    psi_dim = len(init_psi)

    optimized_function_cls = str_to_class(optimized_function)
    optimizer_cls = str_to_class(optimizer)
    diff_scheme_func = str_to_class(diff_scheme)

    experiment = Experiment(project_name=project_name, workspace=work_space)
    experiment.add_tags([x.strip() for x in tags.split(',')])
    experiment.log_parameter('optimizer_type', optimizer)
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.items()
    })
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.get('line_search_options',
                                               {}).items()
    })

    logger = str_to_class(logger)(experiment)
    y_model = optimized_function_cls(device=device, psi_init=init_psi)
    if use_true_grad:
        ndiff = y_model
    else:
        grad_func = lambda f, x, n, h: compute_gradient_of_vector_function(
            f=f, x=x, n=n, h=h, scheme=diff_scheme_func)
        ndiff = NumericalDifferencesModel(y_model=y_model,
                                          psi_dim=psi_dim,
                                          y_dim=1,
                                          x_dim=1,
                                          n=n,
                                          h=h,
                                          num_repetitions=num_repetitions,
                                          grad_func=grad_func)

    max_iters = optimizer_config['max_iters']
    optimizer_config['max_iters'] = 1
    optimizer_config['p'] = p
    optimizer = optimizer_cls(oracle=ndiff, x=init_psi, **optimizer_config)

    for iter in range(max_iters):
        current_psi, status, history = optimizer.optimize()
        print(current_psi)
        # if iter % 10 == 0:
        logger.log_performance(y_sampler=y_model,
                               current_psi=current_psi,
                               n_samples=5000)
        torch.cuda.empty_cache()

    logger.log_optimizer(optimizer)
Ejemplo n.º 6
0
def train_hierarchy_extractor(
    data_path,
    bsz,
    num_workers,
    lr,
    weight_decay,
    warmup_updates,
    max_updates,
    accumulation_steps,
    validation_interval,
    seed,
    model_path,
    tag,
    device,
    save_metric,
    save_min,
):
    experiment = Experiment(project_name="information-retrieval",
                            auto_output_logging=False)
    experiment.add_tags([tag])
    parameters = {
        "bsz": bsz,
        "num_workers": num_workers,
        "lr": lr,
        "weight_decay": weight_decay,
        "warmup_updates": warmup_updates,
        "max_updates": max_updates,
        "validation_interval": validation_interval,
        "seed": seed,
        "model_path": model_path,
        "device": device,
        "accumulation_steps": accumulation_steps,
        "save_metric": save_metric,
        "save_min": save_min
    }
    experiment.log_parameters(parameters)
    model = BertHierarchyExtractor(model_path, 12, device)
    trainer = BertExtractorTrainer(
        experiment,
        model,
        data_path,
        model_path,
        bsz,
        num_workers,
        lr,
        weight_decay,
        warmup_updates,
        max_updates,
        accumulation_steps,
        validation_interval,
        save_metric,
        save_min,
        device,
        seed,
    )
    try:
        trainer.train()
    except Exception as e:
        raise e
Ejemplo n.º 7
0
def setup_comet_ml_experiment(api_key, project_name, experiment_name,
                              parameters, tags):
    """ Function for setting up comet ml experiment """
    experiment = Experiment(api_key=api_key,
                            project_name=project_name,
                            auto_metric_logging=False)
    experiment.set_name(experiment_name)
    experiment.log_parameters(parameters)
    experiment.add_tags(tags)
    return experiment
Ejemplo n.º 8
0
def main(logger, optimizer, optimized_function, optimizer_config_file,
         model_config_file, project_name, work_space, tags, reuse_optimizer,
         init_psi, epochs):
    model_config = getattr(__import__(model_config_file), 'model_config')
    optimizer_config = getattr(__import__(optimizer_config_file),
                               'optimizer_config')
    current_psi = torch.tensor([float(x.strip()) for x in init_psi.split(',')
                                ]).float().to(device)
    psi_dim = len(init_psi)
    print(psi_dim, current_psi)
    optimized_function_cls = str_to_class(optimized_function)
    optimizer_cls = str_to_class(optimizer)

    experiment = Experiment(project_name=project_name, workspace=work_space)
    experiment.add_tags([x.strip() for x in tags.split(',')])
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.items()
    })
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.get('line_search_options',
                                               {}).items()
    })

    logger = str_to_class(logger)(experiment)
    y_model = optimized_function_cls(device=device, psi_init=current_psi)

    model = VoidModel(y_model=y_model, **model_config)
    optimizer = optimizer_cls(oracle=model, x=current_psi, **optimizer_config)

    for epoch in range(epochs):
        if reuse_optimizer:
            optimizer.update(oracle=model, x=current_psi)
        else:
            # find new psi
            optimizer = optimizer_cls(oracle=model,
                                      x=current_psi,
                                      **optimizer_config)

        current_psi, status, history = optimizer.optimize()
        try:
            logger.log_optimizer(optimizer)
            logger.log_grads(model,
                             y_sampler=y_model,
                             current_psi=current_psi,
                             num_repetitions=5000)
            logger.log_performance(y_sampler=y_model,
                                   current_psi=current_psi,
                                   n_samples=5000)

        except Exception as e:
            print(e)
            raise
Ejemplo n.º 9
0
class CometMLMonitor(MonitorBase):
    """
    Send data to https://www.comet.ml.

    Note:
        1. comet_ml requires you to `import comet_ml` before importing tensorflow or tensorpack.
        2. The "automatic output logging" feature of comet_ml will make the training progress bar appear to freeze.
           Therefore the feature is disabled by default.
    """
    def __init__(self, experiment=None, api_key=None, tags=None, **kwargs):
        """
        Args:
            experiment (comet_ml.Experiment): if provided, invalidate all other arguments
            api_key (str): your comet.ml API key
            tags (list[str]): experiment tags
            kwargs: other arguments passed to :class:`comet_ml.Experiment`.
        """
        if experiment is not None:
            self._exp = experiment
            assert api_key is None and tags is None and len(kwargs) == 0
        else:
            from comet_ml import Experiment
            kwargs.setdefault(
                'log_code', True
            )  # though it's not functioning, git patch logging requires it
            kwargs.setdefault('auto_output_logging', None)
            self._exp = Experiment(api_key=api_key, **kwargs)
            if tags is not None:
                self._exp.add_tags(tags)

        self._exp.set_code(
            "Code logging is impossible because there are too many files ...")
        self._exp.log_dependency('tensorpack', __git_version__)

    @property
    def experiment(self):
        """
        The :class:`comet_ml.Experiment` instance.
        """
        return self._exp

    def _before_train(self):
        self._exp.set_model_graph(tf.get_default_graph())

    @HIDE_DOC
    def process_scalar(self, name, val):
        self._exp.log_metric(name, val, step=self.global_step)

    def _after_train(self):
        self._exp.end()

    def _after_epoch(self):
        self._exp.log_epoch_end(self.epoch_num)
Ejemplo n.º 10
0
    def _train_with_comet(self, train_dataset, val_dataset):
        experiment = Experiment(api_key=self.api_key,
                                project_name=self.project_name,
                                workspace=self.workspace)
        experiment.log_dataset_hash(train_dataset)
        experiment.add_tags([
            str(self.architecture), self.name,
            f"nb_labels_{self.label_encoder_classes_number}"
        ])
        with experiment.train():
            hist = self.fit_dataset(train_dataset, val_dataset)
        experiment.end()

        return hist
Ejemplo n.º 11
0
def make_experiment(env_file, name=None, tags=None):

    # Get environment values
    load_dotenv(env_file)
    COMETML_KEY = os.environ.get("COMETML_KEY")
    COMETML_PROJECT = os.environ.get("COMETML_PROJECT")

    # Start and configure experiment
    experiment = Experiment(COMETML_KEY, COMETML_PROJECT)

    if name is not None:
        experiment.set_name(name)
    if tags is not None:
        experiment.add_tags(tags)

    return experiment
Ejemplo n.º 12
0
class Logger:
    def __init__(self, send_logs, tags, parameters):
        self.send_logs = send_logs
        if self.send_logs:
            self.experiment = Experiment(api_key="OZwyhJHyqzPZgHEpDFL1zxhyI",
                                         project_name="drilling-the-hole",
                                         workspace="wwydmanski")
        self.sent_mb = 0

        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)

    def begin_logging(self, episode_count, steps_per_ep):
        if self.send_logs:
            self.experiment.log_parameter("Episode count", episode_count)
            self.experiment.log_parameter("Max steps per episode",
                                          steps_per_ep)

    def log_round(self, actions, reward, cumulative_reward, angle, loss, step):
        if self.send_logs:
            self.experiment.log_metric("Round reward", reward, step=step)
            self.experiment.log_metric("Per-ep reward",
                                       cumulative_reward,
                                       step=step)
            self.experiment.log_metric("Action 1", actions[0], step=step)
            self.experiment.log_metric("Action 2", actions[1], step=step)
            self.experiment.log_metric("Current angle", angle, step=step)

            self.experiment.log_metrics(loss, step=step)

    def log_episode(self, cumulative_reward, state, step):
        if self.send_logs:
            self.experiment.log_metric("Angle", state[0], step=step)
            self.experiment.log_metric("Goal", state[1], step=step)
            self.experiment.log_metric("Cumulative reward",
                                       cumulative_reward,
                                       step=step)

    def end(self):
        if self.send_logs:
            self.experiment.end()
Ejemplo n.º 13
0
class CometTracker:
    def __init__(self, comet_params, experiment_name=None, run_params=None):
        self.experiment = Experiment(**comet_params)

        if run_params is not None:
            self.experiment.log_parameters(run_params)

        if experiment_name is not None:
            self.experiment.set_name(experiment_name)

    def track_metric(self, metric, value, step=None):
        self.experiment.log_metric(metric, value, step)

    def add_tags(self, tags):
        self.experiment.add_tags(tags)
        print(f'In [add_tags]: Added these tags to the new experiment: {tags}')

    def set_name(self, name):
        self.experiment.set_name(name)
Ejemplo n.º 14
0
def log_hyperparameters_to_comet(clf, experiment):
    for i in range(len(clf.cv_results_["params"])):
        exp = Experiment(
            workspace="s0lvang",
            project_name="ideal-pancake-hyperparameter",
            api_key=globals.flags.comet_api_key,
        )
        exp.add_tag("hp_tuning")
        exp.add_tags(globals.comet_logger.get_tags())
        for k, v in clf.cv_results_.items():
            if k == "params":
                exp.log_parameters(v[i])
            else:
                exp.log_metric(k, v[i])
        exp.end()

    old_experiment = ExistingExperiment(
        api_key=globals.flags.comet_api_key,
        previous_experiment=experiment.get_key(),
    )
    globals.comet_logger = old_experiment
Ejemplo n.º 15
0
def run_main_loop(args, train_estimator, predict_estimator):
	total_steps = 0
	train_steps = math.ceil(args.train_examples / args._batch_size)
	eval_steps  = math.ceil(args.eval_examples  / args._batch_size)

	if args.use_comet:
		experiment = Experiment(api_key=comet_ml_api_key, project_name=comet_ml_project, workspace=comet_ml_workspace)
		experiment.log_parameters(vars(args))
		experiment.add_tags(args.tag)
		experiment.set_name(model_name(args))
	else:
		experiment = None

	prefetch_inception_model()

	with tf.gfile.Open(os.path.join(suffixed_folder(args, args.result_dir), "eval.txt"), "a") as eval_file:
		for epoch in range(0, args.epochs, args.predict_every):

			logger.info(f"Training epoch {epoch}")
			train_estimator.train(input_fn=train_input_fn, steps=train_steps * args.predict_every)
			total_steps += train_steps * args.predict_every

			if args.use_comet:
				experiment.set_step(epoch)

			# logger.info(f"Evaluate {epoch}")
			# evaluation = predict_estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
			# logger.info(evaluation)
			# save_evaluation(args, eval_file, evaluation, epoch, total_steps)
			
			# if args.use_comet:
			# 	experiment.log_metrics(evaluation)
			
			logger.info(f"Generate predictions {epoch}")
			predictions = predict_estimator.predict(input_fn=predict_input_fn)
			
			logger.info(f"Save predictions")
			save_predictions(args, suffixed_folder(args, args.result_dir), eval_file, predictions, epoch, total_steps, experiment)

	logger.info(f"Completed {args.epochs} epochs")
Ejemplo n.º 16
0
def train(args):

    # So I don't frigging forget what caused working models
    save_args(args)

    if args["use_tf_debug"]:
        hooks = [tf_debug.LocalCLIDebugHook()]
    else:
        hooks = []

    if args["use_comet"]:
        # Add the following code anywhere in your machine learning file
        experiment = Experiment(api_key="bRptcjkrwOuba29GcyiNaGDbj",
                                project_name="macgraph",
                                workspace="davidhughhenrymack")
        experiment.log_multiple_params(args)

        if len(args["tag"]) > 0:
            experiment.add_tags(args["tag"])

    train_size = sum(
        1 for _ in tf.python_io.tf_record_iterator(args["train_input_path"]))
    logger.info(f"Training on {train_size} records")

    # ----------------------------------------------------------------------------------

    estimator = get_estimator(args)

    train_spec = tf.estimator.TrainSpec(
        input_fn=gen_input_fn(args, "train"),
        max_steps=args["train_max_steps"] *
        1000 if args["train_max_steps"] is not None else None,
        hooks=hooks)

    eval_spec = tf.estimator.EvalSpec(input_fn=gen_input_fn(args, "eval"),
                                      throttle_secs=args["eval_every"])

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Ejemplo n.º 17
0
 def log(self, experiment=None):
     ''' Export all logs in the Comet.ml environment.
         See https://www.comet.ml/ for more details
     '''
     
     # Initialize Comet.ml experience (naming, tags) for automatic logging
     project_name = 'Optimization' if self.comet_optimize else 'Summary'
     experiment_name = '{} - {} '.format(self.model_name, str(self.batch_size)) + ('ES+' if self.train_after_es else '')
     experiment_tags = [ self.model_name, self.monitor_val ] + (['ES+'] if self.train_after_es else []) +  (['Pre-train'] if self.pretraining else [])
     
     if experiment == None:
         experiment = Experiment(api_key='cSZq9kuH2I87ezvm2dEWTx6op', project_name=project_name, log_code=False, auto_param_logging=False, auto_metric_logging=False)
     experiment.set_name(experiment_name)
     experiment.add_tags(experiment_tags)
     
     # Export hyperparameters
     experiment.log_parameters(self.dataloader_params)
     experiment.log_parameters(self.training_params)   
     
     # Export metrics values
     experiment.log_metrics({'Average accuracy' : np.mean(self.test_score['accuracy']), 'Std accuracy' : np.std(self.test_score['accuracy'])})
     
     # Export metrics graphs for each pilot (accuracy, loss, confusion matrix)
     [ experiment.log_figure(figure_name='Confusion matrix {}'.format(pilot_idx), figure=plot_cm(self.conf_matrices, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     [ experiment.log_figure(figure_name='Loss pilot {}'.format(pilot_idx), figure=plot_loss(self.histories[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     fig, ax = plt.subplots(figsize=(10,6))
     plot_full_barchart(self.test_score, n_pilots=self.n_pilots, title=' {} ConvNet model'.format(self.model_name), fig=fig)
     experiment.log_figure(figure_name='Accuracy barchart', figure=fig)
     
     if self.train_after_es:
         [ experiment.log_figure(figure_name='Loss pilot {} (ES+)'.format(pilot_idx), figure=plot_loss(self.histories_es[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     # Export model weights for each pilot
     [ experiment.log_asset('{}{}.h5'.format(self.weights_savename_prefix, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     experiment.end()
class CometML:
    def __init__(self,
                 api_key,
                 project_name,
                 workspace,
                 debug=True,
                 tags=None):
        self._exp = Experiment(
            api_key=api_key,
            project_name=project_name,
            workspace=workspace,
            disabled=debug,
        )
        if not (self._exp.alive or debug):
            raise RuntimeError("Cannot connect to Comet ML")
        self._exp.disable_mp()

        if tags is not None:
            self._exp.add_tags(tags)

    @property
    def run_name(self):
        return self._exp.get_key()

    def args(self, arg_text):
        self._exp.log_parameter("cmd args", arg_text)

    def meta(self, params):
        self._exp.log_parameters(params)

    def log(self, name, value, step):
        self._exp.log_metric(
            name=name,
            value=value,
            step=step,
        )
Ejemplo n.º 19
0
class _Experiment(object):
    def __init__(self, name):
        config_data = read_file_in_dir('./', name + '.json')

        if config_data is None:
            raise Exception("Configuration file doesn't exist: ", name)

        self.name = config_data['experiment_name']

        dataset_config = config_data['dataset']
        data_percentage = dataset_config['data_percentage']
        batch_size = dataset_config['batch_size']
        num_workers = dataset_config['num_workers']
        data_files = dataset_config['data_file_path']

        experiment_config = config_data['experiment']
        self.epochs = experiment_config['num_epochs']
        learning_rate = experiment_config['learning_rate']

        model_config = config_data['model']
        self.is_vae = model_config['is_vae']
        hidden_size = model_config['hidden_size']
        embedding_size = model_config['embedding_size']
        self.is_variational = model_config['is_variational']
        self.is_conditional = model_config['is_conditional']
        class_label = model_config['class_label']

        generation_config = config_data['generation']
        max_length = generation_config['max_length']
        prediction_type = ("Stochastic",
                           "Deterministic")[generation_config["deterministic"]]
        temperature = generation_config['temperature']

        self.experiment_dir = os.path.join(ROOT_STATS_DIR, self.name)
        self.log_comet = config_data['comet']['log_comet']

        if self.log_comet:
            self.experiment = Experiment(
                api_key="CaoCCUZVjE0gXKA3cbtMKSSKL",
                project_name="image-captioning-251b",
                workspace="keshav919",
            )

        # Load Datasets
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.train_loader, self.val_loader, self.test_loader = getDataloaders(
            data_files,
            max_length,
            batch_size,
            num_workers,
            tokenizer,
            data_percentage,
            class_label=class_label)

        # Setup Experiment
        self.current_epoch = 0
        self.training_losses = []
        self.bleu1_t = []  # Geeling: log bleu scores
        self.bleu4_t = []
        self.bleu1_v = []  # Keshav: log bleu scores
        self.bleu4_v = []
        self.val_losses = []
        self.best_loss = float('inf')
        self.best_model = None  # Save your best model in this field and use this in test method.

        if config_data['generation']:
            tags = [self.name, self.is_variational, prediction_type]
            hyper_params = {
                "Epochs": self.epochs,
                "Batch Size": batch_size,
                "Learning Rate": learning_rate,
                "Hidden Size": hidden_size,
                "Embedding Size": embedding_size,
                "Max Length": max_length,
                "Temperature": temperature
            }

            if self.log_comet:
                self.experiment.add_tags(tags)
                self.experiment.log_parameters(hyper_params)

        # Initialize Model
        self.tokenizer = tokenizer
        self.vocab_size = tokenizer.vocab_size
        self.model = getModel(config_data, self.vocab_size)

        # TODO: need to add KL divergence
        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)

        self.init_model()

        self.load_experiment()  # Load Experiment Data if available

    def load_experiment(self):
        """
        Loads the experiment data if exists to resume training from last saved checkpoint.
        """
        os.makedirs(ROOT_STATS_DIR, exist_ok=True)

        # Since we use comet, all our metrics are logged there rather than these directories.
        # Create the dir just for test output
        os.makedirs(self.experiment_dir, exist_ok=True)

    def init_model(self):
        """
            Gets GPU ready to use
        """
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.criterion.to(self.device)

    def loss_function(self, raw_outputs, hypothesis, mu, logvar):
        ceLoss = self.criterion(raw_outputs, hypothesis)
        if self.is_variational:
            klLoss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
            klLoss /= len(raw_outputs)

            ceLoss += klLoss

        return ceLoss

    def run(self):
        start_epoch = self.current_epoch
        for epoch in range(
                start_epoch,
                self.epochs):  # loop over the dataset multiple times
            start_time = datetime.now()
            self.current_epoch = epoch

            if self.is_vae:
                ########################## VAE ##############################
                train_loss, bleu1_scores_t, bleu4_scores_t = self.train_vae()
                if self.log_comet:
                    self.experiment.log_metrics({'Train_Loss': train_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics(
                        {'Train_Metric/BLEU-1': bleu1_scores_t}, epoch=epoch)
                    self.experiment.log_metrics(
                        {'Train_Metric/BLEU-4': bleu4_scores_t}, epoch=epoch)

                val_loss, bleu1_scores_v, bleu4_scores_v = self.val_vae()
                if self.log_comet:
                    self.experiment.log_metrics({'Val_Loss': val_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics(
                        {'Val_Metric/BLEU-1': bleu1_scores_v}, epoch=epoch)
                    self.experiment.log_metrics(
                        {'Val_Metric/BLEU-4': bleu4_scores_v}, epoch=epoch)
            else:
                ########################## BERT ##############################
                train_loss, train_accu = self.train_bert()
                if self.log_comet:
                    self.experiment.log_metrics({'Train_Loss': train_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics({'Train_Accu': train_accu},
                                                epoch=epoch)

                val_loss, val_accu = self.val_bert()
                if self.log_comet:
                    self.experiment.log_metrics({'Val_Loss': val_loss},
                                                epoch=epoch)
                    self.experiment.log_metrics({'Val_Accu': val_accu},
                                                epoch=epoch)

            # Early stopping
            if val_loss < self.best_loss:
                self.best_loss = val_loss
                torch.save(self.model, './saved_models/{}'.format(self.name))

        if self.is_vae:
            self.test_vae()
        else:
            self.test_bert()

########################## VAE ##############################

    def train_vae(self):
        self.model.train()
        training_loss = 0
        bleu1_scores = 0.0
        bleu4_scores = 0.0
        print_iter = 150
        if_counter = 0

        for i, (prem, hyp, _, _, lab) in enumerate(self.train_loader):
            self.model.zero_grad()

            prem = prem.long().to(self.device)
            hyp = hyp.long().to(self.device)
            lab = lab.to(self.device)

            # Forward pass
            preds, raw_outputs, mu, logvar = self.model(
                prem,
                hyp,
                lab,
                self.device,
                is_teacher_forcing_on=True,
                skip_generation=i % print_iter != 0,
                is_conditional=self.is_conditional)

            # Calculate loss and perform backprop
            loss = self.loss_function(raw_outputs[:, 1:].permute(0, 2, 1),
                                      hyp[:, 1:], mu, logvar)
            loss.backward()
            self.optimizer.step()

            # Log the training loss
            training_loss += loss.item()

            # View deterministic predictions
            if i % print_iter == 0:
                if_counter += 1

                # Get the sentence without the <start> and <end> and other tags
                clean_premise_text, _ = clean_caption(prem, self.tokenizer)
                clean_preds_text, _ = clean_caption(preds, self.tokenizer)
                clean_targets_text, _ = clean_caption(hyp, self.tokenizer)

                # Calculate bleu scores
                b1 = calculate_bleu(bleu1, clean_preds_text,
                                    clean_targets_text)
                b4 = calculate_bleu(bleu4, clean_preds_text,
                                    clean_targets_text)
                bleu1_scores += (b1 / len(clean_preds_text))
                bleu4_scores += (b4 / len(clean_preds_text))

                print(self.current_epoch, i, ": ------ TRAIN ------")
                print("------ Actual Premise ------")
                print(clean_premise_text[0])
                print("------ Actual Hypothesis ------")
                print(clean_targets_text[0])
                print("------ Predicted Hypothesis ------")
                print(clean_preds_text[0])
                print()

        return training_loss / (
            i + 1), bleu1_scores / if_counter, bleu4_scores / if_counter

    def val_vae(self):
        self.model.eval()
        val_loss = 0
        bleu1_scores = 0.0
        bleu4_scores = 0.0
        print_iter = 150
        if_counter = 0

        with torch.no_grad():
            for i, (prem, hyp, _, _, lab) in enumerate(self.val_loader):
                prem = prem.long().to(self.device)
                hyp = hyp.long().to(self.device)
                lab = lab.to(self.device)

                # Forward pass
                preds, raw_outputs, mu, logvar = self.model(
                    prem,
                    hyp,
                    lab,
                    self.device,
                    is_teacher_forcing_on=True,
                    skip_generation=i % print_iter != 0,
                    is_conditional=self.is_conditional)

                # Calculate loss and perform backprop
                loss = self.loss_function(raw_outputs[:, 1:].permute(0, 2, 1),
                                          hyp[:, 1:], mu, logvar)

                # Log the training loss
                val_loss += loss.item()

                # View deterministic predictions
                if i % print_iter == 0:
                    if_counter += 1

                    # Get the sentence without the <start> and <end> and other tags
                    clean_premise_text, _ = clean_caption(prem, self.tokenizer)
                    clean_preds_text, _ = clean_caption(preds, self.tokenizer)
                    clean_targets_text, _ = clean_caption(hyp, self.tokenizer)

                    # Calculate bleu scores
                    b1 = calculate_bleu(bleu1, clean_preds_text,
                                        clean_targets_text)
                    b4 = calculate_bleu(bleu4, clean_preds_text,
                                        clean_targets_text)
                    bleu1_scores += (b1 / len(clean_preds_text))
                    bleu4_scores += (b4 / len(clean_preds_text))

                    print(self.current_epoch, i, ": ------ VALIDATION ------")
                    print("------ Actual Premise ------")
                    print(clean_premise_text[0])
                    print("------ Actual Hypothesis ------")
                    print(clean_targets_text[0])
                    print("------ Predicted Hypothesis ------")
                    print(clean_preds_text[0])
                    print()

        return val_loss / (
            i + 1), bleu1_scores / if_counter, bleu4_scores / if_counter

    def test_vae(self):
        self.model = torch.load('./saved_models/{}'.format(self.name))
        self.model.eval()
        test_loss = 0
        bleu1_scores = 0.0
        bleu4_scores = 0.0
        print_iter = 150

        predicted = []
        premises = []
        labels = []
        language = []

        with torch.no_grad():
            for i, (prem, hyp, _, _, lab) in enumerate(self.test_loader):
                prem = prem.long().to(self.device)
                hyp = hyp.long().to(self.device)
                lab = lab.to(self.device)

                # Forward pass
                _, raw_outputs, mu, logvar = self.model(
                    prem,
                    hyp,
                    lab,
                    self.device,
                    is_teacher_forcing_on=True,
                    is_conditional=self.is_conditional)
                preds, _, _, _ = self.model(prem,
                                            hyp,
                                            lab,
                                            self.device,
                                            is_teacher_forcing_on=False,
                                            is_conditional=self.is_conditional)

                # Calculate loss and perform backprop
                loss = self.loss_function(raw_outputs[:, 1:].permute(0, 2, 1),
                                          hyp[:, 1:], mu, logvar)

                # Log the training loss
                test_loss += loss.item()

                # Get the sentence without the <start> and <end> and other tags
                clean_premise_text, clean_premise_joined = clean_caption(
                    prem, self.tokenizer)
                clean_preds_text, clean_preds_joined = clean_caption(
                    preds, self.tokenizer)
                clean_targets_text, _ = clean_caption(hyp, self.tokenizer)

                predicted = predicted + clean_preds_joined
                premises = premises + clean_premise_joined
                labels = labels + lab.tolist()
                language = language + ['en'] * len(clean_premise_joined)

                # Calculate bleu scores
                b1 = calculate_bleu(bleu1, clean_preds_text,
                                    clean_targets_text)
                b4 = calculate_bleu(bleu4, clean_preds_text,
                                    clean_targets_text)
                bleu1_scores += (b1 / len(clean_preds_text))
                bleu4_scores += (b4 / len(clean_preds_text))

                if i % print_iter == 0:
                    print(i, ": ------ TEST ------")
                    print("------ Actual Premise ------")
                    print(clean_premise_text[0])
                    print("------ Actual Hypothesis ------")
                    print(clean_targets_text[0])
                    print("------ Predicted Hypothesis ------")
                    print(clean_preds_text[0])
                    print()

        bleu1_scores = bleu1_scores / (i + 1)
        bleu4_scores = bleu4_scores / (i + 1)
        test_loss = test_loss / (i + 1)

        result_str = "Test Performance: Loss: {}, Bleu1: {}, Bleu4: {}".format(
            test_loss, bleu1_scores, bleu4_scores)
        self.log(result_str)

        if self.log_comet and self.is_vae:
            self.experiment.log_metrics({'Test_Loss': test_loss})
            self.experiment.log_metrics({'Test_Metric/BLEU-1': bleu1_scores})
            self.experiment.log_metrics({'Test_Metric/BLEU-4': bleu4_scores})

        ds = {
            "premise": premises,
            "hypothesis": predicted,
            "label": labels,
            "lang_abv": language
        }
        df = pd.DataFrame(
            ds, columns=["premise", "hypothesis", "label", "lang_abv"])
        df.to_csv("predicted_vae" + self.name + ".csv", index=False)

        return test_loss, bleu1_scores, bleu4_scores

########################## BERT ##############################

    def train_bert(self):
        self.model.train()
        training_loss = 0
        print_iter = 50
        total_pred = 0
        correct_pred = 0

        for i, (prem_id, hyp_id, prem_att_mask, hypo_att_mask,
                lab) in enumerate(self.train_loader):
            self.model.zero_grad()

            # Push data to GPU
            # (Model is pushed to GPU in line 127)
            prem_id = prem_id.long().to(self.device)
            hyp_id = hyp_id.long().to(self.device)
            prem_att_mask = prem_att_mask.long().to(self.device)
            hypo_att_mask = hypo_att_mask.long().to(self.device)
            lab = lab.long().to(self.device)

            # Prepare data as inputs to bert
            input_ids = torch.cat((prem_id, hyp_id), dim=1).long()
            attention_mask = torch.cat((prem_att_mask, hypo_att_mask),
                                       dim=1).long()
            label = lab.unsqueeze(1).long()

            # Forward pass
            outputs = self.model(input_ids,
                                 attention_mask=attention_mask,
                                 labels=label)

            # Calculate loss and perform backprop
            loss = outputs.loss
            loss.backward()
            self.optimizer.step()

            # Log the training loss
            training_loss += loss.item()

            # Get predicted labels
            predicted = torch.argmax(outputs.logits, 1)

            # calculate val accuracy = correct predictions/total predictions
            for j in range(len(lab)):
                total_pred += 1
                if lab[j] == predicted[j]:
                    correct_pred += 1
            accu = correct_pred / total_pred

            # View deterministic predictions
            if i % print_iter == 0:
                print(self.current_epoch, i, ": ------ TRAIN ------")
                print("------ Actual Labels ------")
                print(lab)
                print("------ Predicted Labels ------")
                print(predicted)
                print("Current training accuracy: ", accu)

            # debugging code
            # if i==print_iter+1:
            #     print("i is: ", i)
            #     sys.exit()

        return training_loss / (i + 1), accu

    def val_bert(self):
        self.model.eval()
        val_loss = 0
        print_iter = 50
        total_pred = 0
        correct_pred = 0

        with torch.no_grad():
            for i, (prem_id, hyp_id, prem_att_mask, hypo_att_mask,
                    lab) in enumerate(self.val_loader):
                self.model.zero_grad()

                # Push data to GPU
                prem_id = prem_id.long().to(self.device)
                hyp_id = hyp_id.long().to(self.device)
                prem_att_mask = prem_att_mask.long().to(self.device)
                hypo_att_mask = hypo_att_mask.long().to(self.device)
                lab = lab.long().to(self.device)

                # Prepare data as inputs to bert
                input_ids = torch.cat((prem_id, hyp_id), dim=1)
                attention_mask = torch.cat((prem_att_mask, hypo_att_mask),
                                           dim=1)
                label = lab.unsqueeze(1)

                # Forward pass
                outputs = self.model(input_ids,
                                     attention_mask=attention_mask,
                                     labels=label)

                # Calculate loss and perform backprop
                loss = outputs.loss

                # Log the val loss
                val_loss += loss.item()

                # Get predicted labels
                predicted = torch.argmax(outputs.logits, 1)

                # calculate val accuracy = correct predictions/total predictions
                for j in range(len(lab)):
                    total_pred += 1
                    if lab[j] == predicted[j]:
                        correct_pred += 1
                accu = correct_pred / total_pred

                # View deterministic predictions
                if i % print_iter == 0:
                    print(self.current_epoch, i, ": ------ VAL ------")
                    print("------ Actual Labels ------")
                    print(lab)
                    print("------ Predicted Labels ------")
                    print(predicted)
                    print("Current validation accuracy: ", accu)

        return val_loss / (i + 1), accu

    def test_bert(self):
        self.model.eval()
        test_loss = 0
        print_iter = 50
        total_pred = 0
        correct_pred = 0

        for i, (prem_id, hyp_id, prem_att_mask, hypo_att_mask,
                lab) in enumerate(self.test_loader):
            self.model.zero_grad()

            # Push data to GPU
            prem_id = prem_id.long().to(self.device)
            hyp_id = hyp_id.long().to(self.device)
            prem_att_mask = prem_att_mask.long().to(self.device)
            hypo_att_mask = hypo_att_mask.long().to(self.device)
            lab = lab.long().to(self.device)

            # Prepare data as inputs to bert
            input_ids = torch.cat((prem_id, hyp_id), dim=1)
            attention_mask = torch.cat((prem_att_mask, hypo_att_mask), dim=1)
            label = lab.unsqueeze(1)

            # Forward pass
            outputs = self.model(input_ids,
                                 attention_mask=attention_mask,
                                 labels=label)

            # Calculate loss and perform backprop
            loss = outputs.loss

            # Log the test loss
            test_loss += loss.item()

            # Get predicted labels
            predicted = torch.argmax(outputs.logits, 1)

            # calculate test accuracy = correct predictions/total predictions
            for j in range(len(lab)):
                total_pred += 1
                if lab[j] == predicted[j]:
                    correct_pred += 1
            accu = correct_pred / total_pred

            # View deterministic predictions
            if i % print_iter == 0:
                print(self.current_epoch, i, ": ------ TEST ------")
                print("------ Actual Labels ------")
                print(lab)
                print("------ Predicted Labels ------")
                print(predicted)
                print("Current test accuracy: ", accu)

        if self.log_comet and (not self.is_vae):
            self.experiment.log_metrics({'Test_Loss': test_loss})
            self.experiment.log_metrics({'Test_Accu': accu})

        return test_loss / (i + 1), accu

########################## Log ##############################

    def save_model(self):
        root_model_path = os.path.join(self.experiment_dir, 'latest_model.pt')
        model_dict = self.model.state_dict()
        state_dict = {
            'model': model_dict,
            'optimizer': self.optimizer.state_dict()
        }
        torch.save(state_dict, root_model_path)

    def __record_stats(self, train_loss, bleu1_t, bleu4_t, val_loss, bleu1_v,
                       bleu4_v):
        self.training_losses.append(train_loss)
        self.bleu1_t.append(bleu1_t)
        self.bleu4_t.append(bleu4_t)
        self.val_losses.append(val_loss)
        self.bleu1_v.append(bleu1_v)
        self.bleu4_v.append(bleu4_v)

        self.plot_stats()

        write_to_file_in_dir(self.experiment_dir, 'training_losses.txt',
                             self.__training_losses)
        write_to_file_in_dir(self.experiment_dir, 'val_losses.txt',
                             self.val_losses)

        write_to_file_in_dir(self.experiment_dir, 'bleu1.txt', self.bleu1)
        write_to_file_in_dir(self.experiment_dir, 'bleu4.txt', self.bleu4)

    def log(self, log_str, file_name=None):
        print(log_str)
        log_to_file_in_dir(self.experiment_dir, 'all.log', log_str)
        if file_name is not None:
            log_to_file_in_dir(self.experiment_dir, file_name, log_str)

    def log_epoch_stats(self, start_time):
        time_elapsed = datetime.now() - start_time
        time_to_completion = time_elapsed * (self.epochs - self.current_epoch -
                                             1)
        train_loss = self.training_losses[self.current_epoch]
        val_loss = self.val_losses[self.current_epoch]
        summary_str = "Epoch: {}, Train Loss: {}, Val Loss: {}, Took {}, ETA: {}\n"
        summary_str = summary_str.format(self.current_epoch + 1, train_loss,
                                         val_loss, str(time_elapsed),
                                         str(time_to_completion))
        self.log(summary_str, 'epoch.log')
                                              random_state=42)

    checkpoint_callback = skopt.callbacks.CheckpointSaver(
        f'D:\\FINKI\\8_dps\\Project\\MODELS\\skopt_checkpoints\\{EXPERIMENT_ID}.pkl'
    )
    hyperparameters_optimizer.fit(X_train,
                                  y_train,
                                  callback=[checkpoint_callback])
    skopt.dump(hyperparameters_optimizer, f'saved_models\\{EXPERIMENT_ID}.pkl')

    y_pred = hyperparameters_optimizer.best_estimator_.predict(X_test)

    for i in range(len(hyperparameters_optimizer.cv_results_['params'])):
        exp = Experiment(
            api_key='A8Lg71j9LtIrsv0deBA0DVGcR',
            project_name=ALGORITHM,
            workspace="8_dps",
            auto_output_logging='native',
        )
        exp.set_name(f'{EXPERIMENT_ID}_{i+1}')
        exp.add_tags([
            DS,
            SEGMENTS_LENGTH,
        ])
        for k, v in hyperparameters_optimizer.cv_results_.items():
            if k == "params": exp.log_parameters(dict(v[i]))
            else: exp.log_metric(k, v[i])
        exp.end()

#%%
Ejemplo n.º 21
0
class CorefSolver():
    def __init__(self, args):
        self.args = args
        self.data_utils = data_utils(args)
        self.disable_comet = args.disable_comet
        self.model = self.make_model(
            src_vocab=self.data_utils.vocab_size,
            tgt_vocab=self.data_utils.vocab_size,
            N=args.num_layer,
            dropout=args.dropout,
            entity_encoder_type=args.entity_encoder_type)
        print(self.model)
        if self.args.train:
            self.outfile = open(self.args.logfile, 'w')
            self.model_dir = make_save_dir(args.model_dir)
            # self.logfile = os.path.join(args.logdir, args.exp_name)
            # self.log = SummaryWriter(self.logfile)
            self.w_valid_file = args.w_valid_file

    def make_model(self,
                   src_vocab,
                   tgt_vocab,
                   N=6,
                   dropout=0.1,
                   d_model=512,
                   entity_encoder_type='linear',
                   d_ff=2048,
                   h=8):

        "Helper: Construct a model from hyperparameters."
        c = copy.deepcopy
        attn = MultiHeadedAttention(h, d_model)
        attn_ner = MultiHeadedAttention(1, d_model, dropout)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        position = PositionalEncoding(d_model, dropout)
        embed = Embeddings(d_model, src_vocab)
        word_embed = nn.Sequential(embed, c(position))
        print('pgen', self.args.pointer_gen)

        if entity_encoder_type == 'transformer':
            # entity_encoder = nn.Sequential(embed, Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), 1))
            print('transformer')
            entity_encoder = Seq_Entity_Encoder(
                embed,
                Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), 2))
        elif entity_encoder_type == 'albert':
            albert_tokenizer = AlbertTokenizer.from_pretrained(
                'albert-base-v2')
            albert = AlbertModel.from_pretrained('albert-base-v2')
            entity_encoder = Albert_Encoder(albert, albert_tokenizer, d_model)
        elif entity_encoder_type == 'gru':
            entity_encoder = RNNEncoder(embed,
                                        'GRU',
                                        d_model,
                                        d_model,
                                        num_layers=1,
                                        dropout=0.1,
                                        bidirectional=True)
            print('gru')
        elif entity_encoder_type == 'lstm':
            entity_encoder = RNNEncoder(embed,
                                        'LSTM',
                                        d_model,
                                        d_model,
                                        num_layers=1,
                                        dropout=0.1,
                                        bidirectional=True)
            print('lstm')

        if self.args.ner_at_embedding:
            model = EncoderDecoderOrg(
                Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
                DecoderOrg(
                    DecoderLayerOrg(d_model, c(attn), c(attn), c(ff), dropout),
                    N, d_model, tgt_vocab, self.args.pointer_gen), word_embed,
                word_embed, entity_encoder)
        else:
            if self.args.ner_last:
                decoder = Decoder(
                    DecoderLayer(d_model, c(attn), c(attn), c(ff),
                                 dropout), N, d_model, tgt_vocab,
                    self.args.pointer_gen, self.args.ner_last)
            else:
                decoder = Decoder(
                    DecoderLayer_ner(d_model, c(attn), c(attn), attn_ner,
                                     c(ff), dropout, self.args.fusion), N,
                    d_model, tgt_vocab, self.args.pointer_gen,
                    self.args.ner_last)
            model = EncoderDecoder(
                Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
                decoder, word_embed, word_embed, entity_encoder)

        # This was important from their code.
        # Initialize parameters with Glorot / fan_avg.
        for p in model.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

        # levels = 3
        # num_chans = [d_model] * (args.levels)
        # k_size = 5
        # tcn = TCN(embed, d_model, num_channels, k_size, dropout=dropout)

        return model.cuda()

    def train(self):
        if not self.disable_comet:
            # logging
            hyper_params = {
                "num_layer": self.args.num_layer,
                "pointer_gen": self.args.pointer_gen,
                "ner_last": self.args.ner_last,
                "entity_encoder_type": self.args.entity_encoder_type,
                "fusion": self.args.fusion,
                "dropout": self.args.dropout,
            }
            COMET_PROJECT_NAME = 'summarization'
            COMET_WORKSPACE = 'timchen0618'

            self.exp = Experiment(
                api_key='mVpNOXSjW7eU0tENyeYiWZKsl',
                project_name=COMET_PROJECT_NAME,
                workspace=COMET_WORKSPACE,
                auto_output_logging='simple',
                auto_metric_logging=None,
                display_summary=False,
            )
            self.exp.log_parameters(hyper_params)
            self.exp.add_tags([
                '%s entity_encoder' % self.args.entity_encoder_type,
                self.args.fusion
            ])
            if self.args.ner_last:
                self.exp.add_tag('ner_last')
            if self.args.ner_at_embedding:
                self.exp.add_tag('ner_at_embedding')
            self.exp.set_name(self.args.exp_name)
            self.exp.add_tag('coreference')

        print('ner_last ', self.args.ner_last)
        print('ner_at_embedding', self.args.ner_at_embedding)
        # dataloader & optimizer
        data_yielder = self.data_utils.data_yielder(num_epoch=100)
        optim = torch.optim.Adam(self.model.parameters(),
                                 lr=1e-7,
                                 betas=(0.9, 0.998),
                                 eps=1e-8,
                                 amsgrad=True)  #get_std_opt(self.model)
        # entity_optim = torch.optim.Adam(self.entity_encoder.parameters(), lr=1e-7, betas=(0.9, 0.998), eps=1e-8, amsgrad=True)
        total_loss = []
        start = time.time()
        print('*' * 50)
        print('Start Training...')
        print('*' * 50)
        start_step = 0

        # if loading from checkpoint
        if self.args.load_model:
            state_dict = torch.load(self.args.load_model)['state_dict']
            self.model.load_state_dict(state_dict)
            print("Loading model from " + self.args.load_model + "...")
            # encoder_state_dict = torch.load(self.args.entity_encoder)['state_dict']
            # self.entity_encoder.load_state_dict(encoder_state_dict)
            # print("Loading entity_encoder from %s" + self.args.entity_encoder + "...")
            start_step = int(torch.load(self.args.load_model)['step'])
            print('Resume training from step %d ...' % start_step)

        warmup_steps = 10000
        d_model = 512
        lr = 1e-7
        for step in range(start_step, self.args.total_steps):
            self.model.train()
            batch = data_yielder.__next__()
            optim.zero_grad()
            # entity_optim.zero_grad()

            #update lr
            if step % 400 == 1:
                lr = (1 / (d_model**0.5)) * min(
                    (1 / (step / 4)**0.5), step * (1 / (warmup_steps**1.5)))
                for param_group in optim.param_groups:
                    param_group['lr'] = lr
                # for param_group in entity_optim.param_groups:
                #     param_group['lr'] = lr

            batch['src'] = batch['src'].long()
            batch['tgt'] = batch['tgt'].long()
            batch['ner'] = batch['ner'].long()
            batch['src_extended'] = batch['src_extended'].long()

            # forward the model
            if self.args.entity_encoder_type == 'albert':
                d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                    batch['ner_text'],
                    return_attention_masks=True,
                    max_length=10,
                    add_special_tokens=False,
                    pad_to_max_length=True,
                    return_tensors='pt')
                ner_mask = d['attention_mask'].cuda().unsqueeze(1)
                ner = d['input_ids'].cuda()
                # print('ner', ner.size())
                # print('ner_mask', ner_mask.size())
                # print('src_mask', batch['src_mask'].size())

            if self.args.entity_encoder_type == 'gru' or self.args.entity_encoder_type == 'lstm':
                ner_feat = self.model.entity_encoder(
                    batch['ner'].transpose(0, 1), batch['cluster_len'])[1]
            elif self.args.entity_encoder_type == 'transformer':
                mask = gen_mask(batch['cluster_len'])
                ner_feat = self.model.entity_encoder(batch['ner'], mask)
            ner, ner_mask = self.data_utils.pad_ner_feature(
                ner_feat.squeeze(), batch['num_clusters'],
                batch['src'].size(0))
            # print('ner', ner.size())
            # print('ner_mask', ner_mask.size())

            if self.args.ner_at_embedding:
                out = self.model.forward(batch['src'], batch['tgt'], ner,
                                         batch['src_mask'], batch['tgt_mask'],
                                         batch['src_extended'],
                                         len(batch['oov_list']))
            else:
                out = self.model.forward(batch['src'], batch['tgt'], ner,
                                         batch['src_mask'], batch['tgt_mask'],
                                         batch['src_extended'],
                                         len(batch['oov_list']), ner_mask)
            # print out info
            pred = out.topk(1, dim=-1)[1].squeeze().detach().cpu().numpy()[0]
            gg = batch['src_extended'].long().detach().cpu().numpy()[0][:100]
            tt = batch['tgt'].long().detach().cpu().numpy()[0]
            yy = batch['y'].long().detach().cpu().numpy()[0]

            #compute loss & update
            loss = self.model.loss_compute(out, batch['y'].long())
            loss.backward()
            optim.step()
            # entity_optim.step()

            total_loss.append(loss.detach().cpu().numpy())

            # logging information
            if step % self.args.print_every_steps == 1:
                elapsed = time.time() - start
                print("Epoch Step: %d Loss: %f Time: %f lr: %6.6f" %
                      (step, np.mean(total_loss), elapsed,
                       optim.param_groups[0]['lr']))
                self.outfile.write("Epoch Step: %d Loss: %f Time: %f\n" %
                                   (step, np.mean(total_loss), elapsed))
                print(
                    'src:\n',
                    self.data_utils.id2sent(gg, False, False,
                                            batch['oov_list']))
                print(
                    'tgt:\n',
                    self.data_utils.id2sent(yy, False, False,
                                            batch['oov_list']))
                print(
                    'pred:\n',
                    self.data_utils.id2sent(pred, False, False,
                                            batch['oov_list']))
                print('oov_list:\n', batch['oov_list'])

                if ner_mask != None and not self.args.ner_at_embedding:
                    pp = self.model.greedy_decode(
                        batch['src_extended'].long()[:1], ner[:1],
                        batch['src_mask'][:1], 100, self.data_utils.bos,
                        len(batch['oov_list']), self.data_utils.vocab_size,
                        True, ner_mask[:1])
                else:
                    pp = self.model.greedy_decode(
                        batch['src_extended'].long()[:1], ner[:1],
                        batch['src_mask'][:1], 100, self.data_utils.bos,
                        len(batch['oov_list']), self.data_utils.vocab_size,
                        True)

                pp = pp.detach().cpu().numpy()
                print(
                    'pred_greedy:\n',
                    self.data_utils.id2sent(pp[0], False, False,
                                            batch['oov_list']))

                print()
                start = time.time()
                if not self.disable_comet:
                    # self.log.add_scalar('Loss/train', np.mean(total_loss), step)
                    self.exp.log_metric('Train Loss',
                                        np.mean(total_loss),
                                        step=step)
                    self.exp.log_metric('Learning Rate',
                                        optim.param_groups[0]['lr'],
                                        step=step)

                    self.exp.log_text('Src: ' + self.data_utils.id2sent(
                        gg, False, False, batch['oov_list']))
                    self.exp.log_text('Tgt:' + self.data_utils.id2sent(
                        yy, False, False, batch['oov_list']))
                    self.exp.log_text('Pred:' + self.data_utils.id2sent(
                        pred, False, False, batch['oov_list']))
                    self.exp.log_text('Pred Greedy:' + self.data_utils.id2sent(
                        pp[0], False, False, batch['oov_list']))
                    self.exp.log_text('OOV:' + ' '.join(batch['oov_list']))

                total_loss = []

            ##########################
            # validation
            ##########################
            if step % self.args.valid_every_steps == 2:
                print('*' * 50)
                print('Start Validation...')
                print('*' * 50)
                self.model.eval()
                val_yielder = self.data_utils.data_yielder(1, valid=True)
                total_loss = []
                fw = open(self.w_valid_file, 'w')
                for batch in val_yielder:
                    with torch.no_grad():
                        batch['src'] = batch['src'].long()
                        batch['tgt'] = batch['tgt'].long()
                        batch['ner'] = batch['ner'].long()
                        batch['src_extended'] = batch['src_extended'].long()

                        ### ner ######
                        if self.args.entity_encoder_type == 'albert':
                            d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                                batch['ner_text'],
                                return_attention_masks=True,
                                max_length=10,
                                add_special_tokens=False,
                                pad_to_max_length=True,
                                return_tensors='pt')
                            ner_mask = d['attention_mask'].cuda().unsqueeze(1)
                            ner = d['input_ids'].cuda()

                        if self.args.entity_encoder_type == 'gru' or self.args.entity_encoder_type == 'lstm':
                            ner_feat = self.model.entity_encoder(
                                batch['ner'].transpose(0, 1),
                                batch['cluster_len'])[1]
                        elif self.args.entity_encoder_type == 'transformer':
                            mask = gen_mask(batch['cluster_len'])
                            ner_feat = self.model.entity_encoder(
                                batch['ner'], mask)
                        ner, ner_mask = self.data_utils.pad_ner_feature(
                            ner_feat.squeeze(), batch['num_clusters'],
                            batch['src'].size(0))
                        ### ner ######

                        if self.args.ner_at_embedding:
                            out = self.model.forward(batch['src'],
                                                     batch['tgt'], ner,
                                                     batch['src_mask'],
                                                     batch['tgt_mask'],
                                                     batch['src_extended'],
                                                     len(batch['oov_list']))
                        else:
                            out = self.model.forward(batch['src'],
                                                     batch['tgt'], ner,
                                                     batch['src_mask'],
                                                     batch['tgt_mask'],
                                                     batch['src_extended'],
                                                     len(batch['oov_list']),
                                                     ner_mask)
                        loss = self.model.loss_compute(out, batch['y'].long())
                        total_loss.append(loss.item())

                        if self.args.ner_at_embedding:
                            pred = self.model.greedy_decode(
                                batch['src_extended'].long(), ner,
                                batch['src_mask'], self.args.max_len,
                                self.data_utils.bos, len(batch['oov_list']),
                                self.data_utils.vocab_size)
                        else:
                            pred = self.model.greedy_decode(
                                batch['src_extended'].long(),
                                ner,
                                batch['src_mask'],
                                self.args.max_len,
                                self.data_utils.bos,
                                len(batch['oov_list']),
                                self.data_utils.vocab_size,
                                ner_mask=ner_mask)

                        for l in pred:
                            sentence = self.data_utils.id2sent(
                                l[1:], True, self.args.beam_size != 1,
                                batch['oov_list'])
                            fw.write(sentence)
                            fw.write("\n")
                fw.close()
                # files_rouge = FilesRouge()
                # scores = files_rouge.get_scores(self.w_valid_file, self.args.valid_tgt_file, avg=True)
                scores = cal_rouge_score(self.w_valid_file,
                                         self.args.valid_ref_file)
                r1_score = scores['rouge1']
                r2_score = scores['rouge2']

                print('=============================================')
                print('Validation Result -> Loss : %6.6f' %
                      (sum(total_loss) / len(total_loss)))
                print(scores)
                print('=============================================')
                self.outfile.write(
                    '=============================================\n')
                self.outfile.write('Validation Result -> Loss : %6.6f\n' %
                                   (sum(total_loss) / len(total_loss)))
                self.outfile.write(
                    '=============================================\n')
                # self.model.train()
                # self.log.add_scalar('Loss/valid', sum(total_loss)/len(total_loss), step)
                # self.log.add_scalar('Score/valid', r1_score, step)
                if not self.disable_comet:
                    self.exp.log_metric('Valid Loss',
                                        sum(total_loss) / len(total_loss),
                                        step=step)
                    self.exp.log_metric('R1 Score', r1_score, step=step)
                    self.exp.log_metric('R2 Score', r2_score, step=step)

                #Saving Checkpoint
                w_step = int(step / 10000)
                print('Saving ' + str(w_step) + 'w_model.pth!\n')
                self.outfile.write('Saving ' + str(w_step) + 'w_model.pth\n')

                model_name = str(w_step) + 'w_' + '%6.6f' % (
                    sum(total_loss) / len(total_loss)
                ) + '%2.3f_' % r1_score + '%2.3f_' % r2_score + 'model.pth'
                state = {'step': step, 'state_dict': self.model.state_dict()}
                torch.save(state, os.path.join(self.model_dir, model_name))

                # entity_encoder_name = str(w_step) + '0w_' + '%6.6f'%(sum(total_loss)/len(total_loss)) + '%2.3f_'%r1_score + 'entity_encoder.pth'
                # state = {'step': step, 'state_dict': self.entity_encoder.state_dict()}
                # torch.save(state, os.path.join(self.model_dir, entity_encoder_name))

    def test(self):
        #prepare model
        path = self.args.load_model
        # entity_encoder_path = self.args.entity_encoder
        state_dict = torch.load(path)['state_dict']
        max_len = self.args.max_len
        model = self.model
        model.load_state_dict(state_dict)

        # entity_encoder_dict = torch.load(entity_encoder_path)['state_dict']
        # self.entity_encoder.load_state_dict(entity_encoder_dict)

        pred_dir = make_save_dir(self.args.pred_dir)
        filename = self.args.filename

        #start decoding
        data_yielder = self.data_utils.data_yielder(num_epoch=1)
        total_loss = []
        start = time.time()

        #file
        f = open(os.path.join(pred_dir, filename), 'w')

        self.model.eval()

        # decode_strategy = BeamSearch(
        #             self.beam_size,
        #             batch_size=batch.batch_size,
        #             pad=self._tgt_pad_idx,
        #             bos=self._tgt_bos_idx,
        #             eos=self._tgt_eos_idx,
        #             n_best=self.n_best,
        #             global_scorer=self.global_scorer,
        #             min_length=self.min_length, max_length=self.max_length,
        #             return_attention=attn_debug or self.replace_unk,
        #             block_ngram_repeat=self.block_ngram_repeat,
        #             exclusion_tokens=self._exclusion_idxs,
        #             stepwise_penalty=self.stepwise_penalty,
        #             ratio=self.ratio)

        step = 0
        for batch in data_yielder:
            #print(batch['src'].data.size())
            step += 1
            if step % 100 == 0:
                print('%d batch processed. Time elapsed: %f min.' %
                      (step, (time.time() - start) / 60.0))
                start = time.time()

            ### ner ###
            if self.args.entity_encoder_type == 'albert':
                d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                    batch['ner_text'],
                    return_attention_masks=True,
                    max_length=10,
                    add_special_tokens=False,
                    pad_to_max_length=True,
                    return_tensors='pt')
                ner_mask = d['attention_mask'].cuda().unsqueeze(1)
                ner = d['input_ids'].cuda()
            else:
                ner_mask = None
                ner = batch['ner'].long()

            with torch.no_grad():
                if self.args.beam_size == 1:
                    if self.args.ner_at_embedding:
                        out = self.model.greedy_decode(
                            batch['src_extended'].long(),
                            self.model.entity_encoder(ner),
                            batch['src_mask'], max_len, self.data_utils.bos,
                            len(batch['oov_list']), self.data_utils.vocab_size)
                    else:
                        out = self.model.greedy_decode(
                            batch['src_extended'].long(),
                            self.model.entity_encoder(ner),
                            batch['src_mask'],
                            max_len,
                            self.data_utils.bos,
                            len(batch['oov_list']),
                            self.data_utils.vocab_size,
                            ner_mask=ner_mask)
                else:
                    ret = self.beam_decode(batch, max_len,
                                           len(batch['oov_list']))
                    out = ret['predictions']
            for l in out:
                sentence = self.data_utils.id2sent(l[1:], True,
                                                   self.args.beam_size != 1,
                                                   batch['oov_list'])
                #print(l[1:])
                f.write(sentence)
                f.write("\n")

    def beam_decode(self, batch, max_len, oov_nums):

        src = batch['src'].long()
        src_mask = batch['src_mask']
        src_extended = batch['src_extended'].long()

        bos_token = self.data_utils.bos
        beam_size = self.args.beam_size
        vocab_size = self.data_utils.vocab_size
        batch_size = src.size(0)

        def rvar(a):
            return a.repeat(beam_size, 1, 1)

        def rvar2(a):
            return a.repeat(beam_size, 1)

        def bottle(m):
            return m.view(batch_size * beam_size, -1)

        def unbottle(m):
            return m.view(beam_size, batch_size, -1)

        ### ner ###
        if self.args.entity_encoder_type == 'albert':
            d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                batch['ner_text'],
                return_attention_masks=True,
                max_length=10,
                add_special_tokens=False,
                pad_to_max_length=True,
                return_tensors='pt')
            ner_mask = d['attention_mask'].cuda().unsqueeze(1)
            ner = d['input_ids'].cuda()
        else:
            ner_mask = None
            ner = batch['ner'].long()
        ner = self.model.entity_encoder(ner)

        if self.args.ner_at_embedding:
            memory = self.model.encode(src, src_mask, ner)
        else:
            memory = self.model.encode(src, src_mask)

        assert batch_size == 1

        beam = [
            Beam(beam_size,
                 self.data_utils.pad,
                 bos_token,
                 self.data_utils.eos,
                 min_length=self.args.min_length) for i in range(batch_size)
        ]
        memory = rvar(memory)
        ner = rvar(ner)
        src_mask = rvar(src_mask)
        src_extended = rvar2(src_extended)

        for i in range(self.args.max_len):
            if all((b.done() for b in beam)):
                break
            # Construct batch x beam_size nxt words.
            # Get all the pending current beam words and arrange for forward.
            inp = torch.stack([b.get_current_state()
                               for b in beam]).t().contiguous().view(-1, 1)
            #inp -> [1, 3]
            inp_mask = inp < self.data_utils.vocab_size
            inp = inp * inp_mask.long()

            decoder_input = inp

            if self.args.ner_at_embedding:
                final_dist = self.model.decode(memory, ner, src_mask,
                                               decoder_input, None,
                                               src_extended, oov_nums)
            else:
                final_dist = self.model.decode(memory,
                                               ner,
                                               src_mask,
                                               decoder_input,
                                               None,
                                               src_extended,
                                               oov_nums,
                                               ner_mask=ner_mask)
            # final_dist, decoder_hidden, attn_dist_p, p_gen = self.seq2seq_model.model_copy.decoder(
            #                 decoder_input, decoder_hidden,
            #                 post_encoder_outputs, post_enc_padding_mask,
            #                 extra_zeros, post_enc_batch_extend_vocab
            #                 )
            # # Run one step.

            # print('inp', inp.size())

            # decoder_outputs: beam x rnn_size

            # (b) Compute a vector of batch*beam word scores.
            out = unbottle(final_dist)
            out[:, :, 2] = 0  #no unk
            # out.size -> [3, 1, vocab]

            # (c) Advance each beam.
            for j, b in enumerate(beam):
                b.advance(out[:, j])
                # decoder_hidden = self.beam_update(j, b.get_current_origin(), beam_size, decoder_hidden)

        # (4) Extract sentences from beam.
        ret = self._from_beam(beam)

        return ret

    def _from_beam(self, beam):
        ret = {"predictions": [], "scores": []}
        for b in beam:

            n_best = self.args.n_best
            scores, ks = b.sort_finished(minimum=n_best)
            hyps = []
            for i, (times, k) in enumerate(ks[:n_best]):
                hyp = b.get_hyp(times, k)
                hyps.append(hyp)

            ret["predictions"].append(hyps)
            ret["scores"].append(scores)

        return ret
Ejemplo n.º 22
0
class Logger:
    def __init__(self, send_logs, tags, parameters, experiment=None):
        self.stations = 5
        self.send_logs = send_logs
        if self.send_logs:
            if experiment is None:
                json_loc = glob.glob("./**/comet_token.json")[0]
                with open(json_loc, "r") as f:
                    kwargs = json.load(f)

                self.experiment = Experiment(**kwargs)
            else:
                self.experiment = experiment
        self.sent_mb = 0
        self.speed_window = deque(maxlen=100)
        self.step_time = None
        self.current_speed = 0
        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)

    def begin_logging(self, episode_count, steps_per_ep, sigma, theta, step_time):
        self.step_time = step_time
        if self.send_logs:
            self.experiment.log_parameter("Episode count", episode_count)
            self.experiment.log_parameter("Steps per episode", steps_per_ep)
            self.experiment.log_parameter("theta", theta)
            self.experiment.log_parameter("sigma", sigma)

    def log_round(self, states, reward, cumulative_reward, info, loss, observations, step):
        self.experiment.log_histogram_3d(states, name="Observations", step=step)
        info = [[j for j in i.split("|")] for i in info]
        info = np.mean(np.array(info, dtype=np.float32), axis=0)
        try:
            # round_mb = np.mean([float(i.split("|")[0]) for i in info])
            round_mb = info[0]
        except Exception as e:
            print(info)
            print(reward)
            raise e
        self.speed_window.append(round_mb)
        self.current_speed = np.mean(np.asarray(self.speed_window)/self.step_time)
        self.sent_mb += round_mb
        # CW = np.mean([float(i.split("|")[1]) for i in info])
        CW = info[1]
        # stations = np.mean([float(i.split("|")[2]) for i in info])
        self.stations = info[2]
        fairness = info[3]

        if self.send_logs:
            self.experiment.log_metric("Round reward", np.mean(reward), step=step)
            self.experiment.log_metric("Per-ep reward", np.mean(cumulative_reward), step=step)
            self.experiment.log_metric("Megabytes sent", self.sent_mb, step=step)
            self.experiment.log_metric("Round megabytes sent", round_mb, step=step)
            self.experiment.log_metric("Chosen CW", CW, step=step)
            self.experiment.log_metric("Station count", self.stations, step=step)
            self.experiment.log_metric("Current throughput", self.current_speed, step=step)
            self.experiment.log_metric("Fairness index", fairness, step=step)

            for i, obs in enumerate(observations):
                self.experiment.log_metric(f"Observation {i}", obs, step=step)

            self.experiment.log_metrics(loss, step=step)

    def log_episode(self, cumulative_reward, speed, step):
        if self.send_logs:
            self.experiment.log_metric("Cumulative reward", cumulative_reward, step=step)
            self.experiment.log_metric("Speed", speed, step=step)

        self.sent_mb = 0
        self.last_speed = speed
        self.speed_window = deque(maxlen=100)
        self.current_speed = 0

    def end(self):
        if self.send_logs:
            self.experiment.end()
Ejemplo n.º 23
0
class CometMLMonitor(MonitorBase):
    """
    Send scalar data and the graph to https://www.comet.ml.

    Note:
        1. comet_ml requires you to `import comet_ml` before importing tensorflow or tensorpack.
        2. The "automatic output logging" feature of comet_ml will make the training progress bar appear to freeze.
           Therefore the feature is disabled by default.
    """
    def __init__(self, experiment=None, tags=None, **kwargs):
        """
        Args:
            experiment (comet_ml.Experiment): if provided, invalidate all other arguments
            tags (list[str]): experiment tags
            kwargs: arguments used to initialize :class:`comet_ml.Experiment`,
                such as project name, API key, etc.
                Refer to its documentation for details.
        """
        if experiment is not None:
            self._exp = experiment
            assert tags is None and len(kwargs) == 0
        else:
            from comet_ml import Experiment
            kwargs.setdefault(
                'log_code', True
            )  # though it's not functioning, git patch logging requires it
            kwargs.setdefault('auto_output_logging', None)
            self._exp = Experiment(**kwargs)
            if tags is not None:
                self._exp.add_tags(tags)

        self._exp.set_code("Code logging is impossible ...")
        self._exp.log_dependency('tensorpack', __git_version__)

    @property
    def experiment(self):
        """
        The :class:`comet_ml.Experiment` instance.
        """
        return self._exp

    def _before_train(self):
        self._exp.set_model_graph(tf.get_default_graph())

    @HIDE_DOC
    def process_scalar(self, name, val):
        self._exp.log_metric(name, val, step=self.global_step)

    @HIDE_DOC
    def process_image(self, name, val):
        self._exp.set_step(self.global_step)
        for idx, v in enumerate(val):
            log_name = "{}_step{}{}".format(
                name, self.global_step, "_" + str(idx) if len(val) > 1 else "")

            self._exp.log_image(v,
                                image_format="jpeg",
                                name=log_name,
                                image_minmax=(0, 255))

    def _after_train(self):
        self._exp.end()

    def _after_epoch(self):
        self._exp.log_epoch_end(self.epoch_num)
Ejemplo n.º 24
0
def main(
    logger,
    optimized_function,
    optimizer_config_file,
    model_config_file,
    project_name,
    work_space,
    tags,
    init_psi,
    n_samples_per_dim,
):
    model_config = getattr(__import__(model_config_file), 'model_config')
    model_config["num_repetitions"] = n_samples_per_dim
    optimizer_config = getattr(__import__(optimizer_config_file),
                               'optimizer_config')
    init_psi = torch.tensor([float(x.strip())
                             for x in init_psi.split(',')]).float().to(device)
    psi_dim = len(init_psi)

    optimized_function_cls = str_to_class(optimized_function)

    experiment = Experiment(project_name=project_name, workspace=work_space)
    experiment.add_tags([x.strip() for x in tags.split(',')])
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.items()
    })
    experiment.log_parameters({
        "optimizer_{}".format(key): value
        for key, value in optimizer_config.get('line_search_options',
                                               {}).items()
    })
    experiment.log_parameters(
        {"model_{}".format(key): value
         for key, value in model_config.items()})

    logger = str_to_class(logger)(experiment)
    y_model = optimized_function_cls(device=device, psi_init=init_psi)
    model = VoidModel(y_model=y_model, psi=init_psi, **model_config)

    optimizer = VoidOptimizer(oracle=model,
                              x=init_psi,
                              logger=logger,
                              n_samples=model_config["K"],
                              **optimizer_config)

    current_psi, status, history = optimizer.optimize()

    try:
        logger.log_optimizer(optimizer)
        logger.log_grads(model,
                         y_sampler=y_model,
                         current_psi=x_k,
                         num_repetitions=30000,
                         n_samples=100,
                         log_grad_diff=True)
        logger.log_performance(y_sampler=y_model,
                               current_psi=current_psi,
                               n_samples=5000)

    except Exception as e:
        print(e)
        raise
Ejemplo n.º 25
0
def comet_lgbm(save_path):
    from comet_ml import Experiment
    exp = Experiment(api_key="sqMrI9jc8kzJYobRXRuptF5Tj",
                            project_name="baseline", workspace="gdreiman1")
    exp.log_code = True
    
    import pickle
    import pandas as pd
    import lightgbm as lgb
    import numpy as np
    import sklearn
    import matplotlib.pyplot as plt
    from sklearn.metrics import precision_recall_fscore_support as prf
    #%%
    def single_roc(y_preds,y_true):
        
        from sklearn.metrics import roc_curve, auc,precision_recall_curve
        fpr, tpr, _ = roc_curve(y_true, y_preds)
        roc_auc = auc(fpr, tpr)
        plt.figure()
        lw = 2
        plt.plot(fpr, tpr, color='darkorange',
                 lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic example')
        
        precision, recall, thresholds = precision_recall_curve(y_true, y_preds)
        plt.plot(recall, precision, color='blue',
                 lw=lw, label='Precision vs Recall')
        # show the plot
        plt.legend(loc="lower right")
        plt.show()
    def multi_roc(y_preds,y_true,name,n_classes):
        import collections
        nested_dict = lambda: collections.defaultdict(nested_dict)
        data_store = nested_dict()
        from sklearn.metrics import roc_curve, auc
        from scipy import interp
        from itertools import cycle
        lw = 2
        name_store = ['Active', 'Inactive', 'Inconclusive']
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_preds[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        
        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_true[:, i].ravel(), y_preds[:, i].ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
        # Compute macro-average ROC curve and ROC area
        
        # First aggregate all false positive rates
        all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
        
        # Then interpolate all ROC curves at this points
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(n_classes):
            mean_tpr += interp(all_fpr, fpr[i], tpr[i])
        
        # Finally average it and compute AUC
        mean_tpr /= n_classes
        
        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
        
        # Plot all ROC curves
        plt.figure()
        plt.plot(fpr["micro"], tpr["micro"],
                 label='micro-average ROC curve (area = {0:0.2f})'
                       ''.format(roc_auc["micro"]),
                 color='deeppink', linestyle=':', linewidth=4)
        
        plt.plot(fpr["macro"], tpr["macro"],
                 label='macro-average ROC curve (area = {0:0.2f})'
                       ''.format(roc_auc["macro"]),
                 color='navy', linestyle=':', linewidth=4)
        
        colors = cycle(['aqua', 'darkorange', 'cornflowerblue','green'])
        for i, color in zip(range(n_classes), colors):
            plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                     label='ROC curve of '+ name_store[i]+'(area = {1:0.2f})'
                     ''.format(i, roc_auc[i]))
        
        plt.plot([0, 1], [0, 1], 'k--', lw=lw)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        #plt.title('Multi-class ROC for '+name+' Split= '+str(count+1))
        plt.title('Multi-class ROC for '+name)
    
        plt.legend(loc="lower right")
        #plt.show()
    #%%
    #save_path = r'C:\Users\gdrei\Dropbox\UCL\Thesis\May_13\AID_1345083_processed.pkl'
    model_type = 'lgbm'
    #get data cleaned
    pickle_off = open(save_path,'rb')
    activity_table=pickle.load(pickle_off)
    pickle_off.close()
    #get length of MFP
    fp_length = len(activity_table.iloc[5]['MFP'])
    
    
    from sklearn.preprocessing import StandardScaler, LabelEncoder
    scaler = StandardScaler(copy = False)
    le = LabelEncoder()
    labels = le.fit_transform(activity_table['PUBCHEM_ACTIVITY_OUTCOME'])
    #split data:
    from sklearn.model_selection import StratifiedShuffleSplit
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.5, train_size=None, random_state=2562)
    X_mfp = np.concatenate(np.array(activity_table['MFP'])).ravel()
    X_mfp = X_mfp.reshape((-1,fp_length))
    for train_ind, test_ind in splitter.split(X_mfp,labels):
        # standardize data
        X_train_molchars_std = scaler.fit_transform(np.array(activity_table.iloc[train_ind,4:]))
        X_test_molchars_std = scaler.transform(np.array(activity_table.iloc[test_ind,4:]))
        X_train = np.concatenate((X_mfp[train_ind,:],X_train_molchars_std),axis = 1)
        X_test = np.concatenate((X_mfp[test_ind,:],X_test_molchars_std),axis = 1)
        y_train = labels[train_ind]
        y_test = labels[test_ind]
        #X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,labels,test_size = .5, shuffle = True, stratify = labels, random_state = 2562)
        bin_y_train, bin_y_test = [1 if x ==2 else x for x in y_train],[1 if x ==2 else x for x in y_test]
        
    #do light gbm
        
    #need to make a lib svm file
    train_data = lgb.Dataset(X_train,label=y_train)
    test_data = lgb.Dataset(X_test,label=y_test)
    #make model class
    lgbm_model = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=500, subsample_for_bin=200000, 
                                    objective='binary', is_unbalance=True, min_split_gain=0.0, min_child_weight=0.001, min_child_samples=20, subsample=1.0, 
                                    subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0, random_state=None, n_jobs=-1, silent=True, 
                                    importance_type='split')
    #train model
    trained_mod = lgbm_model.fit(X_train,y_train)
    #predict classes and class_probs
    test_class_preds = lgbm_model.predict(X_test)
    test_prob_preds = lgbm_model.predict_proba(X_test)
    #calculate Class report
    class_rep = sklearn.metrics.classification_report(y_test,test_class_preds)
    
    print(class_rep)
    if len(set(y_test)) == 2:
        single_roc(test_prob_preds[:,1],y_test)
        prec,rec,f_1,supp = prf(y_test, test_class_preds, average=None)
    else:
        from tensorflow.keras.utils import to_categorical
        multi_roc(test_prob_preds,to_categorical(y_test),'',3)
        prec,rec,f_1,supp = prf(y_test, test_class_preds, average=None)
    
    
     #%% 
    '''Comet Saving Zone'''
    #get AID number
    import ntpath
    #get base file name
    folder,base = ntpath.split(save_path)
    #split file name at second _ assumes file save in AID_xxx_endinfo.pkl
    AID, _,end_info = base.rpartition('_')
    #save data location, AID info, and version info
    exp.log_dataset_info(name = AID, version = end_info, path = save_path)
    #save model params
    exp.log_parameters(trained_mod.get_params())
    #save metrics report to comet
    if len(f_1) == 2:
        for i,name in enumerate(['Active','Inactive']):
            exp.log_metric('f1 class '+name, f_1[i])
            exp.log_metric('Recall class'+name,rec[i])
            exp.log_metric('Precision class'+name, prec[i])
    else:
        for i,name in enumerate(['Active','Inconclusive','Inactive']):
            exp.log_metric('f1 class '+str(i), f_1[i])
            exp.log_metric('Recall class'+str(i),rec[i])
            exp.log_metric('Precision class'+str(i), prec[i])
        #exp.log_metric('f1 class '+str(i), f_1[i])
        #exp.log_metric('Recall class'+str(i),rec[i])
        #exp.log_metric('Precision class'+str(i), prec[i])
    exp.log_other('Classification Report',class_rep)
     #save model in data_folder with comet experiement number associated
    exp_num = exp.get_key()
    model_save = folder+'\\'+model_type+'_'+exp_num+'.pkl'
    pickle_on = open(model_save,'wb')
    pickle.dump(trained_mod,pickle_on)
    pickle_on.close()
    #log trained model location
    exp.log_other('Trained Model Path',model_save)
    #save some informatvie tags:
    tags = [AID,end_info,model_type]
    exp.add_tags(tags)
    #save ROC curve
    exp.log_figure(figure_name = 'ROC-Pres/Recall',figure=plt)
    plt.show()

    #tell comet that the experiement is over
    exp.end()
Ejemplo n.º 26
0
    
# comet.ml experiment information
if args.comet_track:
    experiment = Experiment(api_key="",
                        project_name="", 
                        workspace="",
                        auto_output_logging="simple",
                        log_git_metadata=False,
                        log_git_patch=False)
    if experiment.alive is False:
        raise Exception("Could not connect to comet.ml!")
    # disable automatic logging
    # experiment.auto_param_logging=False
    # parameters all args above as parameters
    experiment.log_parameters(args.__dict__)
    experiment.add_tags([args.task, args.model])   
else:
    # pseudoclass to enable "with experiment.train()" when not using comet
    experiment = voidExperiment() 

# GloVe has 4 valid embedding dimensions, assert that one of them is chosen
if not args.no_glove:
    assert args.dim_emb in [50, 100, 200, 300], 'Choose valid GloVe dimension'

print('\nEvaluating incremental outputs of task: {}, with model {}.\n'.format(
                                                        args.task, args.model))

seq2seq=True
if args.task in seq2label_tasks:
    assert args.model not in ['lstm_crf', 'bilstm_crf'], 'CRF cannot be used with seq2label task'
    seq2seq = False 
Ejemplo n.º 27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default='gcn')
    parser.add_argument('--type', type=str, default='base')
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--hidden_dim', type=int, default=300)
    parser.add_argument('--num_layers', type=int, default=5)

    args = parser.parse_args()
    assert args.model in ['gcn', 'sage', 'gin']
    assert args.type in [
        'base', 'transfer', 'transfer-damaged', 'self-transfer',
        'self-transfer-damaged'
    ]
    assert args.runs >= 1
    assert args.epochs >= 1
    assert args.lr > 0
    assert args.hidden_dim > 0
    assert args.num_layers > 0

    # ---------------------------------------------------
    # MODEL
    # ---------------------------------------------------
    model = networks[args.model](in_channels=dataset.num_features,
                                 hidden_channels=args.hidden_dim,
                                 out_channels=dataset.num_tasks,
                                 num_conv_layers=args.num_layers).to(device)

    # ---------------------------------------------------
    #  EXPERIMENT DETAILS
    # ---------------------------------------------------
    print('Graph Classification Experiment')
    print('Mol_HIV task')
    print(exp_description[args.type])
    print('----------------------------------------------')
    print('Model: {}'.format(args.model))
    print('Number of runs: {}'.format(args.runs))
    print('Number of epochs: {}'.format(args.epochs))
    print('Learning rate: {}'.format(args.lr))
    print()
    print(model)

    # ---------------------------------------------------
    # EXPERIMENT LOOP
    # ---------------------------------------------------
    for run in range(args.runs):
        print()
        print('Run #{}'.format(run + 1))

        # Model initialisation
        if args.type == 'base':
            model.reset_parameters()

        elif args.type in ['transfer', 'transfer-damaged']:
            # Pretrain on Mol-BBBP
            model.reset_parameters()
            bbbp_optimiser = optim.Adam(model.parameters(), lr=0.001)
            to_damage = args.type == 'transfer-damaged'

            print('Pretraining model on Mol-BBBP...')
            best_val_acc = pretrain_molbbbp(model,
                                            device,
                                            bbbp_evaluator,
                                            bbbp_optimiser,
                                            args.model,
                                            damage=to_damage)
            print('Validation accuracy: {:.3}'.format(best_val_acc))

            model.load_state_dict(
                torch.load('molbbbp_models/{}_molbbbp.pth'.format(args.model)))

        elif args.type in ['self-transfer', 'self-transfer-damaged']:
            # Pretrain on Mol-HIV Source Split
            model.reset_parameters()
            source_optimiser = optim.Adam(model.parameters(), lr=0.001)
            to_damage = args.type == 'self-transfer-damaged'

            print('Pretraining model on Mol-HIV Source Task...')
            best_val_acc = pretrain_source_molhiv(model,
                                                  device,
                                                  evaluator,
                                                  source_optimiser,
                                                  args.model,
                                                  damage=to_damage)
            print('Validation accuracy: {:.3}'.format(best_val_acc))

            model.load_state_dict(
                torch.load('molhiv/{}_source_molhiv.pth'.format(args.model)))

        # Comet Experiment
        experiment = Experiment(project_name='graph-classification',
                                display_summary_level=0,
                                auto_metric_logging=False)
        experiment.add_tags([args.model, args.type])
        experiment.log_parameters({
            'hidden_dim': args.hidden_dim,
            'num_features': dataset.num_features,
            'num_classes': dataset.num_tasks,
            'learning_rate': args.lr,
            'num_epochs': args.epochs,
        })

        # Mol-HIV Target Training
        print('Training on Mol-HIV')
        optimizer = optim.Adam(model.parameters(), args.lr)

        for epoch in tqdm(range(args.epochs)):
            train_loss = train(model, device, target_loader, optimizer)
            train_performance = eval(model, device, target_loader, evaluator)

            experiment.log_metric('train_loss', train_loss.item(), step=epoch)
            experiment.log_metric('train_roc-auc',
                                  train_performance[dataset.eval_metric],
                                  step=epoch)

        experiment.end()
Ejemplo n.º 28
0
    'minibatch_size': minibatch_size,
    'lr': learning_rate,
    'discount_factor': discount_factor,
    'random_process_theta': random_process_args['theta'],
    'log_interval_steps': log_interval_steps,
    'train_data_shape': train_data_df.shape,
    'test_data_shape': test_data_df.shape,
    'dataset_name': dataset_name,
    'device_type': device_type
}

print('Running with params: %s' % str(params))

if log_comet:
    experiment.log_parameters(params)
    experiment.add_tags(comet_tags)
    if plot_stocks:
        experiment.log_image('train_stocks_plot.png', 'train_window_stocks')
        if test_stocks_plot_fig is not None:
            experiment.log_image('test_stocks_plot.png', 'test_window_stocks')

num_stocks = train_data_df.shape[1]
num_states_and_actions = num_stocks

# init DDPG agent
agent = DDPG(num_states_and_actions,
             num_states_and_actions,
             minibatch_size,
             random_process_args,
             learning_rate=learning_rate,
             discount_factor=discount_factor,
ap, f1_max, precision, recall, f1_max_th, fig_pre_rec, fig_th_pre_rec = precision_recall(y_test, 
                                                       score_test,
                                                       limit=1000,
                                                       label_anomaly=labels[0])

#fig_score_train = plot_score(score_train, 'train')
#fig_score_val = plot_score(score_val, 'validation')
fig_score_test = plot_score(score_test, 'test', 10, labels=y_test, th=f1_max_th)

fig_cumsum = pca.plot_cumsum()

y_pred, conf_matrix = predict(score_test, f1_max_th, y_test, labels)


experiment.add_tags([data, metric])
parameters = {'var': var, 'pc': pca.pcs_, 'metric': metric}
experiment.log_parameters(parameters)
experiment.log_metric('ap', ap)
experiment.log_metric('f1', f1_max)
experiment.log_metric('precision', precision)
experiment.log_metric('recall', recall)
experiment.log_metric('train_time', pca.time_)
experiment.log_parameter('th_f1', f1_max_th)
experiment.log_figure('cumsum', fig_cumsum)
experiment.log_figure('score_test',fig_score_test)
experiment.log_figure('precision_recall',fig_pre_rec)
experiment.log_figure('th_pre_rec_f1', fig_th_pre_rec)
experiment.log_confusion_matrix(matrix=conf_matrix, labels=labels)

experiment.end()
Ejemplo n.º 30
0
    evaluation_iterator = pieces.validation_iterator or pieces.iterator
    evaluation_dataset = pieces.test_dataset

else:
    raise ConfigurationError("Need to use am-trainer.")

params.assert_empty('base train command')

if args.comet is not None:
    experiment = Experiment(api_key=args.comet,
                            workspace=args.workspace,
                            project_name=args.project,
                            parse_args=False,
                            auto_output_logging=None)
    if args.tags:
        experiment.add_tags(args.tags)
    with open(args.param_path) as fil:
        code = "".join(fil.readlines())
    code += "\n\n#=============Full details=============\n\n"
    code += _jsonnet.evaluate_file(args.param_path)
    code += "\n\n#=============IMPORTANT: overwritten options============\n\n"
    code += args.overrides
    experiment.set_code(code)
    code_data = json.loads(_jsonnet.evaluate_file(args.param_path))
    experiment.log_parameter(
        "bert", "bert" in code_data["dataset_reader"]["token_indexers"])
    experiment.log_parameter(
        "elmo", "elmo" in code_data["dataset_reader"]["token_indexers"])
    experiment.log_parameter("model_directory", serialization_dir)
    experiment.log_parameter("cuda_device", cuda_device)
    experiment.log_parameter("corpora", code_data["iterator"]["formalisms"])