Example #1
0
class CometLogger():
    def __init__(self, enabled, is_existing=False, prev_exp_key=None):
        """
        Handles logging of experiment to comet and also persistence to local file system.
        Supports resumption of stopped experiments.
        """
        disabled = not enabled

        if not is_existing:
            self.experiment = Experiment(api_key=COMET_API_KEY,
                                         workspace=COMET_WORKSPACE,
                                         project_name=PROJECT_NAME,
                                         disabled=disabled)
        else:
            if prev_exp_key is None:
                raise ValueError(
                    "Requested existing experiment, but no key provided")
            print("Continuing existing experiment with key: ", prev_exp_key)
            self.experiment = ExistingExperiment(
                api_key=COMET_API_KEY,
                workspace=COMET_WORKSPACE,
                project_name=PROJECT_NAME,
                disabled=disabled,
                previous_experiment=prev_exp_key)
        self.disabled = disabled

    def get_experiment_key(self):
        return self.experiment.get_key()[:9]

    def add_tag(self, tag):
        self.experiment.add_tag(tag)

    def log_metric(self, name, value, step=None):
        self.experiment.log_metric(name, value, step=step)

    def log_metrics(self, metrics_dict, prefix, step=None):
        self.experiment.log_metrics(metrics_dict, prefix=prefix, step=step)

    def log_params(self, params_dict):
        self.experiment.log_parameters(params_dict)

    def set_name(self, name_str):
        self.experiment.set_name(name_str)

    def log_dataset(self, dataset: SpeakerVerificationDataset):
        if self.disabled:
            return
        dataset_string = ""
        dataset_string += "<b>Speakers</b>: %s\n" % len(dataset.speakers)
        dataset_string += "\n" + dataset.get_logs()
        dataset_string = dataset_string.replace("\n", "<br>")
        self.vis.text(dataset_string, opts={"title": "Dataset"})

    def log_implementation(self, params):
        if self.disabled:
            return
        implementation_string = ""
        for param, value in params.items():
            implementation_string += "<b>%s</b>: %s\n" % (param, value)
            implementation_string = implementation_string.replace("\n", "<br>")
        self.implementation_string = implementation_string
        self.implementation_win = self.vis.text(
            implementation_string, opts={"title": "Training implementation"})

    def draw_projections(self,
                         embeds,
                         utterances_per_speaker,
                         step,
                         out_fpath=None,
                         max_speakers=16):
        if self.disabled:
            return
        max_speakers = min(max_speakers, len(colormap))
        embeds = embeds[:max_speakers * utterances_per_speaker]

        n_speakers = len(embeds) // utterances_per_speaker
        ground_truth = np.repeat(np.arange(n_speakers), utterances_per_speaker)
        colors = [colormap[i] for i in ground_truth]

        reducer = umap.UMAP()
        projected = reducer.fit_transform(embeds)
        plt.scatter(projected[:, 0], projected[:, 1], c=colors)
        plt.gca().set_aspect("equal", "datalim")
        plt.title("UMAP projection (step %d)" % step)
        if out_fpath is not None:
            plt.savefig(out_fpath)
        plt.clf()
        self.experiment.log_image(out_fpath, step=step)
    
    loss = running_loss / total
    acc = 100 * float(correct) / total
    if experiment:
        with experiment.validate():
            experiment.log_metric('loss', loss, step=epoch)
            experiment.log_metric('acc', acc, step=epoch)
    print('VALIDATION - Epoch: %d, Loss: %.3f, Accuracy: %.3f' % (
        epoch, loss, acc))
    print('Epoch Time: %.1f' % (time.time()-running_time))
    print()
    
    scheduler.step(loss)
    early_stopping.step(loss)
    
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)
    checkpoint = {
        'epoch': epoch + 1,
        'best_acc': best_acc,
        'train_indices': train_indices,
        'validation_indices': validation_indices,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict()}
    if experiment:
        checkpoint['experiment_key'] = experiment.get_key()
    save_checkpoint(checkpoint, is_best)

print('Finished Training')
class CometConnection:
    def __init__(self, comet_name=None, dataset_config=None, exp_key=None):
        self.experiment = None

        if comet_name is not None and dataset_config is not None:
            self._init_new_experiment(comet_name, dataset_config)
        elif exp_key is not None:
            self._init_continue_experiment(exp_key)

    def _init_new_experiment(self, comet_name, dataset_config):
        self.experiment = Experiment(api_key=COMET_KEY,
                                     project_name=PROJECT_NAME)
        self.experiment.set_name(comet_name)
        self.log_data_attributes(dataset_config)
        self.experiment.log_asset('datagen/spectra_generator.m')

    def _init_continue_experiment(self, exp_key):
        self.experiment = ExistingExperiment(api_key=COMET_KEY,
                                             previous_experiment=exp_key)

    def serialize(self):
        params = dict()
        params["comet_exp_key"] = self.experiment.get_key()

        return params

    def save(self, save_dir):
        info_dict = self.serialize()
        json.dump(info_dict,
                  open(os.path.join(save_dir, COMET_SAVE_FILENAME), "w"))

    def persist(self, config_path):
        info = json.load(open(config_path, 'r'))
        self.__init__(exp_key=info["comet_exp_key"])

    def log_data_attributes(self, dataset_config):
        for key, value in dataset_config.items():
            self.experiment.log_parameter("SPECTRUM_" + key, value)

    def log_imgs(self, dataset_name):
        try:
            imgs_dir = os.path.join(DATA_DIR, dataset_name, 'imgs')
            self.experiment.log_asset_folder(imgs_dir)
        except:
            print(f"No images found for dataset: {dataset_name}")

    def log_script(self, dataset_config):
        script_name = dataset_config['matlab_script']
        try:
            matlab_dir = os.path.join(GEN_DIR, script_name)
            self.experiment.log_asset(matlab_dir)
        except:
            print(f"Could not find {script_name} under {GEN_DIR}.")

    def format_classification_report(self, classification_report):
        return {
            f'{k}_test_{metric}': metric_val
            for k, v in classification_report.items()
            for metric, metric_val in v.items()
        }

    def get_classification_report(self, y_test, preds):
        preds_formatted = np.argmax(preds, axis=1)
        test_formatted = np.argmax(y_test, axis=1)
        peak_labels = [
            f"n_peaks_{1 + num_peak}" for num_peak in range(y_test.shape[1])
        ]
        classif_report = classification_report(test_formatted,
                                               preds_formatted,
                                               target_names=peak_labels,
                                               output_dict=True)
        classif_report_str = classification_report(test_formatted,
                                                   preds_formatted,
                                                   target_names=peak_labels)

        if self.experiment is not None:
            formatted = self.format_classification_report(classif_report)
            self.experiment.log_metrics(formatted)
            self.experiment.log_text(classif_report_str)

        return classif_report
class CometLogger():
    def __init__(self, disabled, is_existing=False, prev_exp_key=None):
        """
        Handles logging of experiment to comet and also persistence to local file system.
        Supports resumption of stopped experiments.
        """

        if not is_existing:
            self.experiment = Experiment(api_key=COMET_API_KEY,
                                         workspace=COMET_WORKSPACE,
                                         project_name=PROJECT_NAME,
                                         disabled=disabled)
        else:
            if prev_exp_key is None:
                raise ValueError("Requested existing experiment, but no key provided")
            print("Continuing existing experiment with key: ", prev_exp_key)
            self.experiment = ExistingExperiment(api_key=COMET_API_KEY,
                                                 workspace=COMET_WORKSPACE,
                                                 project_name=PROJECT_NAME,
                                                 disabled=disabled,
                                                 previous_experiment=prev_exp_key)
        self.disabled = disabled
        self.name = None

    def get_experiment_key(self):
        return self.experiment.get_key()[:9]

    def add_tag(self, tag):
        self.experiment.add_tag(tag)

    def log_metric(self, name, value, step=None):
        self.experiment.log_metric(name, value, step=step)

    def log_metrics(self, metrics_dict, prefix, step=None):
        self.experiment.log_metrics(metrics_dict, prefix=prefix, step=step)

    def log_params(self, params_dict):
        self.experiment.log_parameters(params_dict)

    def set_name(self, name_str):
        self.experiment.set_name(name_str)
        self.name = name_str

    def save_act_grads(self, log_dict):
        """Save a dictionary of activation/gradients records to disk"""
        assert isinstance(log_dict, dict)
        if self.name is None:
            warnings.warn("Experiment name not set, not saving")
            return

        # Save the log dictionary
        file_name = f"./.{self.name}.record"
        with open(file_name, 'wb') as f:
            pickle.dump(log_dict, f)

    # TODO: need to rewrite before can be used for MNIST.
    def draw_projections(self, embeds, utterances_per_speaker, step, out_fpath=None,
                         max_speakers=16):
        import umap
        if self.disabled:
            return
        max_speakers = min(max_speakers, len(colormap))
        embeds = embeds[:max_speakers * utterances_per_speaker]

        n_speakers = len(embeds) // utterances_per_speaker
        ground_truth = np.repeat(np.arange(n_speakers), utterances_per_speaker)
        colors = [colormap[i] for i in ground_truth]

        reducer = umap.UMAP()
        projected = reducer.fit_transform(embeds)
        plt.scatter(projected[:, 0], projected[:, 1], c=colors)
        plt.gca().set_aspect("equal", "datalim")
        plt.title("UMAP projection (step %d)" % step)
        if out_fpath is not None:
            plt.savefig(out_fpath)
        plt.clf()
        self.experiment.log_image(out_fpath, step=step)
Example #5
0
    def __init__(
        self,
        api_key=None,
        parameters=None,
        experiment_key=None,
        artifacts_directory=None,
        credentials_path=None,
    ):
        artifacts_directory = (artifacts_directory if artifacts_directory else
                               os.getenv('ARTIFACTS_DIRECTORY'))

        api_key = api_key if api_key else os.getenv('COMET_API_KEY')

        self.credentials_path = {
            credentials_path
            if credentials_path else os.getenv('PATH_TO_GOOGLE_CREDENTIALS')
        }

        parameters = parameters if parameters else {}

        if not parameters and not experiment_key:
            raise ValueError(
                'Either parameters or experiment_key must be provided!')

        if experiment_key is not None:
            logging.info(f"Updating existing experiment: {experiment_key}")
            experiment = ExistingExperiment(api_key=api_key,
                                            previous_experiment=experiment_key)
            current_parameters = self.load_experiment_parameters(
                experiment_key, artifacts_directory)
            parameters = deep_update(current_parameters, parameters)
        else:
            experiment = Experiment(
                api_key=api_key,
                project_name=parameters['info']['project_name'])

        self.experiment_key = experiment.get_key()
        self.experiment_url = experiment._get_experiment_url()
        self.experiment = experiment
        if 'filename' in parameters:
            if os.path.exists(parameters['filename']):
                with open(parameters['filename'], 'r') as f:
                    data = f.read()
                self.experiment.set_code(data)
        self.runner = DockerRunner()
        self.parameters = parameters

        flattened_parameters = flatten(parameters)
        self.experiment.log_parameters(flattened_parameters)

        self.output_folder = os.path.join(artifacts_directory,
                                          self.experiment_key)
        os.makedirs(os.path.join(self.output_folder), exist_ok=True)

        fpath = os.path.join(self.output_folder, 'config.yml')
        with open(fpath, 'w') as f:
            yaml.safe_dump(parameters, f)

        self.init_gsheet(self.credentials_path)
        logging.info(
            f"Set up {self.experiment_key} - {self.parameters['info']['notes']}"
        )
        self.experiment.end()