Ejemplo n.º 1
0
def plot_truth_vs_prediction(
    y_pred: Union[JOINTS_25D, JOINTS_3D],
    y_true: Union[JOINTS_25D, JOINTS_3D],
    image: torch.Tensor,
    experiment: Experiment,
):
    """Generates the graphics with input image, predicetd labels and the ground truth.

    Args:
        y_pred (Union[JOINTS_25D, JOINTS_3D]): Output from the model as a tensor. shape (21 x 3)
        y_true (Union[JOINTS_25D, JOINTS_3D]): ground truth. shape(21 x 3)
        image (torch.Tensor): Input image to the model.
        experiment (Experiment): Comet ml experiment object.
    """
    img = cv2.cvtColor(np.array(transforms.ToPILImage()(image)),
                       cv2.COLOR_BGR2RGB)
    width, height, _ = img.shape
    fig = plt.figure(figsize=(10, 10))
    ax1 = fig.add_subplot(121)
    plt.imshow(img)
    plot_hand(ax1, y_true)
    ax1.title.set_text("True joints")
    ax2 = fig.add_subplot(122)
    plot_hand(ax2, y_true, alpha=0.2, linestyle=":")
    plot_hand(ax2, y_pred)
    ax2.set_xlim([0, width])
    ax2.set_ylim([height, 0])
    ax2.title.set_text("Predicted joints")
    if experiment is not None:
        experiment.log_figure(figure=plt)
    plt.close()
Ejemplo n.º 2
0
def train(path):
    name = os.path.splitext(os.path.basename(path))[0]
    print('Processing: ', name)
    features = pd.read_csv(path, index_col=None)
    selected_features_names = [name for name, desc in selected_features]
    features = features[selected_features_names]
    split_idx = 1200
    features = features.drop(['sound.files'], axis=1)
    noise_only_df, df = features.iloc[:split_idx], features.iloc[split_idx:]
    y = df.pop('petrel')
    X = df.values
    y_noise = noise_only_df.pop('petrel')
    X_noise = noise_only_df.values
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
    hyperparams = {
        'n_estimators': [100, 300, 500, 1000],
        'learning_rate': [0.1],
        'gamma': [0.0, 0.5],
        'max_depth': [2, 3, 4],
        'min_child_weight': [1, 2],
        'subsample': [1.0, 0.8],
        'reg_alpha': [0.0, 0.1],
        'reg_lambda': [1, 2, 3]
    }
    #
    # hyperparams = {
    #     'n_estimators': [100],
    #     'learning_rate': [0.1],
    #     'gamma': [0.0],
    #     'max_depth': [2],
    #     'min_child_weight': [1],
    #     'subsample': [1.0],
    #     'reg_alpha': [0.0],
    #     'reg_lambda': [1]
    # }

    clf = model_selection.GridSearchCV(estimator=xg.XGBClassifier(objective='binary:logistic', n_jobs=-1),
                                       param_grid=hyperparams,
                                       cv=4)
    fit_params = clf.fit(X_train, y_train)
    estimator = fit_params.best_estimator_
    joblib.dump(estimator, name + '_model.pkl')

    test_pred = estimator.predict(X_test)
    metrics = calculate_metrics(test_pred, y_test)

    noise_pred = estimator.predict(X_noise)
    noise_detection_accuracy = accuracy_score(y_noise, noise_pred)

    experiment = Experiment(api_key="4PdGdUZmGf6P8QsMa5F2zB4Ui",
                            project_name="storm petrels",
                            workspace="tracewsl")
    experiment.set_name(name)
    experiment.log_parameter('name', name)
    experiment.log_multiple_params(fit_params.best_params_)
    experiment.log_multiple_metrics(metrics)
    experiment.log_metric('Noise detection accuracy', noise_detection_accuracy)
    experiment.log_figure('Confusion matrix', get_confusion_matrix_figure(test_pred, y_test))
    experiment.log_figure('Feature importnace', get_feature_importance_figure(estimator, list(df.columns.values)))
Ejemplo n.º 3
0
def plot_simclr_images(img1: np.array, img2: np.array,
                       comet_logger: Experiment):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(121)
    plt.imshow(
        cv2.cvtColor(np.array(transforms.ToPILImage()(img1.cpu())),
                     cv2.COLOR_BGR2RGB))
    ax.set_title("Image 1")
    ax = fig.add_subplot(122)
    plt.imshow(
        cv2.cvtColor(np.array(transforms.ToPILImage()(img2.cpu())),
                     cv2.COLOR_BGR2RGB))
    ax.set_title("Image 2")
    if comet_logger is not None:
        comet_logger.log_figure(figure=plt)
    plt.close()
Ejemplo n.º 4
0
 def log(self, experiment=None):
     ''' Export all logs in the Comet.ml environment.
         See https://www.comet.ml/ for more details
     '''
     
     # Initialize Comet.ml experience (naming, tags) for automatic logging
     project_name = 'Optimization' if self.comet_optimize else 'Summary'
     experiment_name = '{} - {} '.format(self.model_name, str(self.batch_size)) + ('ES+' if self.train_after_es else '')
     experiment_tags = [ self.model_name, self.monitor_val ] + (['ES+'] if self.train_after_es else []) +  (['Pre-train'] if self.pretraining else [])
     
     if experiment == None:
         experiment = Experiment(api_key='cSZq9kuH2I87ezvm2dEWTx6op', project_name=project_name, log_code=False, auto_param_logging=False, auto_metric_logging=False)
     experiment.set_name(experiment_name)
     experiment.add_tags(experiment_tags)
     
     # Export hyperparameters
     experiment.log_parameters(self.dataloader_params)
     experiment.log_parameters(self.training_params)   
     
     # Export metrics values
     experiment.log_metrics({'Average accuracy' : np.mean(self.test_score['accuracy']), 'Std accuracy' : np.std(self.test_score['accuracy'])})
     
     # Export metrics graphs for each pilot (accuracy, loss, confusion matrix)
     [ experiment.log_figure(figure_name='Confusion matrix {}'.format(pilot_idx), figure=plot_cm(self.conf_matrices, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     [ experiment.log_figure(figure_name='Loss pilot {}'.format(pilot_idx), figure=plot_loss(self.histories[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     fig, ax = plt.subplots(figsize=(10,6))
     plot_full_barchart(self.test_score, n_pilots=self.n_pilots, title=' {} ConvNet model'.format(self.model_name), fig=fig)
     experiment.log_figure(figure_name='Accuracy barchart', figure=fig)
     
     if self.train_after_es:
         [ experiment.log_figure(figure_name='Loss pilot {} (ES+)'.format(pilot_idx), figure=plot_loss(self.histories_es[pilot_idx-1], pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     
     # Export model weights for each pilot
     [ experiment.log_asset('{}{}.h5'.format(self.weights_savename_prefix, pilot_idx)) for pilot_idx in range(1,self.n_pilots+1)]
     experiment.end()
import matplotlib as mpl
mpl.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
from comet_ml import Experiment

experiment = Experiment(api_key="API_KEY",
                        project_name='matplotlib-demos',
                        auto_param_logging=False)

t = np.arange(0.0, 2.0, 0.01)
s = 1 + np.sin(2*np.pi*t)
plt.plot(t, s)

plt.xlabel('time (s)')
plt.ylabel('voltage (mV)')
plt.title('About as simple as it gets, folks')
plt.grid(True)

experiment.log_figure(figure=plt)
Ejemplo n.º 6
0
class Logger:
    """
    Logs/plots results to comet.

    Args:
        exp_config (dict): experiment configuration hyperparameters
        model_config (dict): model configuration hyperparameters
        data_config (dict): data configuration hyperparameters
    """
    def __init__(self, exp_config, model_config, data_config):
        self.exp_config = exp_config
        self.experiment = Experiment(**exp_config['comet_config'])
        self.experiment.disable_mp()
        self._log_hyper_params(exp_config, model_config, data_config)
        self._epoch = 0

    def _log_hyper_params(self, exp_config, model_config, data_config):
        """
        Log the hyper-parameters for the experiment.

        Args:
            exp_config (dict): experiment configuration hyperparameters
            model_config (dict): model configuration hyperparameters
            data_config (dict): data configuration hyperparameters
        """
        def flatten_arg_dict(arg_dict):
            flat_dict = {}
            for k, v in arg_dict.items():
                if type(v) == dict:
                    flat_v = flatten_arg_dict(v)
                    for kk, vv in flat_v.items():
                        flat_dict[k + '_' + kk] = vv
                else:
                    flat_dict[k] = v
            return flat_dict

        self.experiment.log_parameters(flatten_arg_dict(exp_config))
        self.experiment.log_parameters(flatten_arg_dict(model_config))
        self.experiment.log_parameters(flatten_arg_dict(data_config))

    def log(self, results, train_val):
        """
        Plot the results in comet.

        Args:
            results (dict): dictionary of metrics to plot
            train_val (str): either 'train' or 'val'
        """
        objectives, grads, params, images, metrics = results
        for metric_name, metric in objectives.items():
            self.experiment.log_metric(metric_name + '_' + train_val, metric,
                                       self._epoch)
            print(metric_name, ':', metric.item())
        if train_val == 'train':
            for grad_metric_name, grad_metric in grads.items():
                self.experiment.log_metric('grads_' + grad_metric_name,
                                           grad_metric, self._epoch)
        for param_name, param in params.items():
            self.experiment.log_metric(param_name + '_' + train_val, param,
                                       self._epoch)
        for image_name, imgs in images.items():
            self.plot_images(imgs, image_name, train_val)
        for metric_name, metric in metrics.items():
            self.experiment.log_metric(metric_name + '_' + train_val, metric,
                                       self._epoch)
        if train_val == 'val':
            self._epoch += 1

    def plot_images(self, images, title, train_val):
        """
        Plot a tensor of images.

        Args:
            images (torch.Tensor): a tensor of shape [steps, b, c, h, w]
            title (str): title for the images, e.g. reconstructions
            train_val (str): either 'train' or 'val'
        """
        # add a channel dimension if necessary
        if len(images.shape) == 4:
            s, b, h, w = images.shape
            images = images.view(s, b, 1, h, w)
        s, b, c, h, w = images.shape
        if b > 10:
            images = images[:, :10]
        # swap the steps and batch dimensions
        images = images.transpose(0, 1).contiguous()
        images = images.view(-1, c, h, w)
        # grid = make_grid(images.clamp(0, 1), nrow=s).numpy()
        grid = make_grid(images, nrow=s).numpy()
        if c == 1:
            grid = grid[0]
            cmap = 'gray'
        else:
            grid = np.transpose(grid, (1, 2, 0))
            cmap = None
        plt.imshow(grid, cmap=cmap)
        plt.axis('off')
        self.experiment.log_figure(figure=plt,
                                   figure_name=title + '_' + train_val)
        plt.close()

    def save(self, model):
        """
        Save the model weights in comet.

        Args:
            model (nn.Module): the model to be saved
        """
        if self._epoch % self.exp_config['checkpoint_interval'] == 0:
            print('Checkpointing the model...')
            state_dict = model.state_dict()
            cpu_state_dict = {k: v.cpu() for k, v in state_dict.items()}
            # save the state dictionary
            ckpt_path = os.path.join('./ckpt_epoch_' + str(self._epoch) +
                                     '.ckpt')
            torch.save(cpu_state_dict, ckpt_path)
            self.experiment.log_asset(ckpt_path)
            os.remove(ckpt_path)
            print('Done.')

    def load(self, model):
        """
        Load the model weights.
        """
        assert self.exp_config[
            'checkpoint_exp_key'] is not None, 'Checkpoint experiment key must be set.'
        print('Loading checkpoint from ' +
              self.exp_config['checkpoint_exp_key'] + '...')
        comet_api = comet_ml.papi.API(
            rest_api_key=self.exp_config['rest_api_key'])
        exp = comet_api.get_experiment(
            workspace=self.exp_config['comet_config']['workspace'],
            project_name=self.exp_config['comet_config']['project_name'],
            experiment=self.exp_config['checkpoint_exp_key'])
        # asset_list = comet_api.get_experiment_asset_list(self.exp_config['checkpoint_exp_key'])
        asset_list = exp.get_asset_list()
        # get most recent checkpoint
        ckpt_assets = [
            asset for asset in asset_list if 'ckpt' in asset['fileName']
        ]
        asset_times = [asset['createdAt'] for asset in ckpt_assets]
        asset = asset_list[asset_times.index(max(asset_times))]
        print('Checkpoint Name:', asset['fileName'])
        ckpt = exp.get_asset(asset['assetId'])
        state_dict = torch.load(io.BytesIO(ckpt))
        model.load(state_dict)
        print('Done.')
Ejemplo n.º 7
0
class Trainer2D:
    def __init__(self, config):
        self.experiment = Experiment(api_key="CQ4yEzhJorcxul2hHE5gxVNGu",
                                     project_name="HIP")
        self.experiment.log_parameters(vars(config))
        self.config = config
        self.log_step = config.log_step
        self.model = conv2d.Conv2DPatches(image_size=config.image_size)
        print(self.model)
        self.d = get_dataloader2D(config)
        self.train_loader, self.test_loader = self.d
        self.train_loader_jig, self.test_loader_jig = get_dataloader2DJigSaw(
            config)
        self.net_optimizer = optim.Adam(self.model.parameters(), config.lr,
                                        [0.5, 0.9999])
        if torch.cuda.is_available():
            self.model = self.model.cuda()
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_d = nn.MSELoss()
        self.epochs = config.epochs
        if torch.cuda.is_available():
            print("Using CUDA")
            self.model = self.model.cuda()
        #     self.model = self.model.cuda()
        self.pre_model_path = "./artifacts/pre_models/" + str(
            config.lr) + ".pth"
        self.model_path = "./artifacts/models/" + str(config.lr) + ".pth"
        self.image_size = config.image_size

    def pre_train(self):

        if os.path.isfile(self.pre_model_path):
            print("Using pre-trained model for solving the jigsaw puzzle")
            self.model = torch.load(self.pre_model_path)
        else:
            print("Starting pre-training and solving the jigsaw puzzle")
            for epoch in range(0):
                print("Starting epoch {}".format(epoch))
                train_loader = iter(self.train_loader_jig)
                with self.experiment.train():
                    for i in range(len(train_loader)):
                        self.net_optimizer.zero_grad()
                        data, indexes, _ = train_loader.next()
                        # print(landmarks)
                        # print(landmarks.shape)
                        data, indexes = self.to_var(data), self.to_var(
                            indexes).float()
                        B, L, H, W = data.size()
                        B, L, S = indexes.size()
                        print(data.size())
                        print(indexes.size())

                        jig_out, _ = self.model(data, True)
                        loss = self.criterion_d(jig_out, indexes.view(-1, S))
                        loss.backward()
                        self.net_optimizer.step()
                        # self.plots(y_slices, landmarks[:, :, [0, 2]], detected_points)
                        self.experiment.log_metric("pre-loss", loss.item())
                        print("loss: {}".format(loss.item()))

            torch.save(self.model, self.pre_model_path)

    def train(self):
        if os.path.isfile(self.model_path):
            print("Using pre-trained model")
            self.model = torch.load(self.model_path)
        if False:
            pass
        else:
            print("Starting training")
            if torch.cuda.is_available():
                self.model = self.model.cuda()
            for epoch in range(self.epochs):
                print("Starting epoch {}".format(epoch))
                train_loader = iter(self.train_loader)
                with self.experiment.train():
                    for i in range(len(train_loader)):
                        self.net_optimizer.zero_grad()
                        data, landmarks, _ = train_loader.next()
                        # print(landmarks)
                        data, landmarks = self.to_var(data), self.to_var(
                            landmarks)
                        B, L, H, W = data.size()
                        B, L, S = landmarks.size()
                        y = landmarks[:, :, 1].view(B, L)
                        y_slices = torch.zeros([B, L, H, W],
                                               dtype=torch.float32)
                        if torch.cuda.is_available():
                            y_slices = y_slices.cuda()
                        for i in range(B):
                            y_slices[i] = data[i, y[i]]

                        jig_out, detected_points = self.model(y_slices)
                        landmarks = landmarks.float() / self.image_size
                        loss = self.criterion_d(detected_points,
                                                landmarks[:, :, [0, 2]])
                        loss.backward()
                        self.net_optimizer.step()
                        # self.plots(y_slices, landmarks[:, :, [0, 2]], detected_points)
                        self.experiment.log_metric("loss", loss.item())
                        print("loss: {}".format(loss.item()))
                if epoch % self.log_step == 0:
                    with self.experiment.test():
                        self.evaluate()
                        evaluator = Evaluator(self, self.test_loader)
                        evaluator.report()
            torch.save(self.model, self.model_path)
        evaluator = Evaluator(self, self.test_loader)
        evaluator.report()

    def evaluate(self):
        test_loader = iter(self.test_loader)
        with self.experiment.test():
            loss = 0
            for i in range(len(test_loader)):
                self.net_optimizer.zero_grad()
                data, landmarks, _ = test_loader.next()
                data, landmarks = self.to_var(data), self.to_var(landmarks)
                B, L, H, W = data.size()
                B, L, S = landmarks.size()
                y = landmarks[:, :, 1].view(B, L)
                y_slices = torch.zeros([B, L, H, W], dtype=torch.float32)
                if torch.cuda.is_available():
                    y_slices = y_slices.cuda()

                for i in range(B):
                    y_slices[i] = data[i, y[i]]

                jig_out, detected_points = self.model(y_slices)
                landmarks = landmarks.float() / self.image_size
                loss += self.criterion_d(detected_points,
                                         landmarks[:, :, [0, 2]]).item()
                self.plots(y_slices.cpu(), landmarks[:, :, [0, 2]],
                           detected_points)
            self.experiment.log_metric("loss", loss / len(test_loader))

    def plots(self, slices, real, predicted):
        figure, axes = plt.subplots(nrows=4, ncols=4, figsize=(15, 15))
        slices = slices[0].cpu().detach().numpy()
        real = real[0].cpu().detach().numpy()
        predicted = predicted[0].cpu().detach().numpy()
        real *= self.image_size
        predicted *= self.image_size
        s = 0
        # print(real.size())
        # print(predicted.size())
        for i in range(4):
            for j in range(4):
                axes[i, j].imshow(slices[s])
                x, z = real[s]
                axes[i, j].scatter(x, z, color="red")
                x, z = predicted[s]
                axes[i, j].scatter(x, z, color="blue")
                s += 1
        self.experiment.log_figure(figure=plt)
        plt.savefig("artifacts/predictions/img.png")
        plt.show()

    def to_var(self, x):
        """Converts numpy to variable."""
        if torch.cuda.is_available():
            x = x.cuda()
        return Variable(x, requires_grad=False)

    def to_data(self, x):
        """Converts variable to numpy."""
        if torch.cuda.is_available():
            x = x.cpu()
        return x.data.numpy()

    def predict(self, x):
        if torch.cuda.is_available():
            self.model = self.model.cuda()
            x = x.cuda()
        _, x = self.model(x)
        return x
Ejemplo n.º 8
0
class Dashboard:
    """Record training/evaluation statistics to comet
    :param Path log_dir
    :param list taskid_to_name
    """
    def __init__(self, config, paras, log_dir, train_type, resume=False):
        self.log_dir = log_dir
        self.expkey_f = Path(self.log_dir, 'exp_key')
        self.global_step = 1

        if resume:
            assert self.expkey_f.exists(
            ), f"Cannot find comet exp key in {self.log_dir}"
            with open(Path(self.log_dir, 'exp_key'), 'r') as f:
                exp_key = f.read().strip()
            self.exp = ExistingExperiment(
                previous_experiment=exp_key,
                project_name=COMET_PROJECT_NAME,
                workspace=COMET_WORKSPACE,
                auto_output_logging=None,
                auto_metric_logging=None,
                display_summary_level=0,
            )
        else:
            self.exp = Experiment(
                project_name=COMET_PROJECT_NAME,
                workspace=COMET_WORKSPACE,
                auto_output_logging=None,
                auto_metric_logging=None,
                display_summary_level=0,
            )
            #TODO: is there exists better way to do this?
            with open(self.expkey_f, 'w') as f:
                print(self.exp.get_key(), file=f)

            self.exp.log_other('seed', paras.seed)
            self.log_config(config)
            if train_type == 'evaluation':
                if paras.pretrain:
                    self.exp.set_name(
                        f"{paras.pretrain_suffix}-{paras.eval_suffix}")
                    self.exp.add_tags([
                        paras.pretrain_suffix, config['solver']['setting'],
                        paras.accent, paras.algo, paras.eval_suffix
                    ])
                    if paras.pretrain_model_path:
                        self.exp.log_other("pretrain-model-path",
                                           paras.pretrain_model_path)
                    else:
                        self.exp.log_other("pretrain-runs",
                                           paras.pretrain_runs)
                        self.exp.log_other("pretrain-setting",
                                           paras.pretrain_setting)
                        self.exp.log_other("pretrain-tgt-accent",
                                           paras.pretrain_tgt_accent)
                else:
                    self.exp.set_name(paras.eval_suffix)
                    self.exp.add_tags(
                        ["mono", config['solver']['setting'], paras.accent])
            else:
                self.exp.set_name(paras.pretrain_suffix)
                self.exp.log_others({
                    f"accent{i}": k
                    for i, k in enumerate(paras.pretrain_accents)
                })
                self.exp.log_other('accent', paras.tgt_accent)
                self.exp.add_tags([
                    paras.algo, config['solver']['setting'], paras.tgt_accent
                ])
            #TODO: Need to add pretrain setting

        ##slurm-related
        hostname = os.uname()[1]
        if len(hostname.split('.')) == 2 and hostname.split(
                '.')[1] == 'speech':
            logger.notice(f"Running on Battleship {hostname}")
            self.exp.log_other('jobid', int(os.getenv('SLURM_JOBID')))
        else:
            logger.notice(f"Running on {hostname}")

    def log_config(self, config):
        #NOTE: depth at most 2
        for block in config:
            for n, p in config[block].items():
                if isinstance(p, dict):
                    self.exp.log_parameters(p, prefix=f'{block}-{n}')
                else:
                    self.exp.log_parameter(f'{block}-{n}', p)

    def set_status(self, status):
        self.exp.log_other('status', status)

    def step(self, n=1):
        self.global_step += n

    def set_step(self, global_step=1):
        self.global_step = global_step

    def log_info(self, prefix, info):
        self.exp.log_metrics({k: float(v)
                              for k, v in info.items()},
                             prefix=prefix,
                             step=self.global_step)

    def log_other(self, name, value):
        self.exp.log_metric(name, value, step=self.global_step)

    def log_step(self):
        self.exp.log_other('step', self.global_step)

    def add_figure(self, fig_name, data):
        self.exp.log_figure(figure_name=fig_name,
                            figure=data,
                            step=self.global_step)

    def check(self):
        if not self.exp.alive:
            logger.warning("Comet logging stopped")
Ejemplo n.º 9
0
distrfracs = np.array(distrfracs)
order = np.argsort(distrfracs)

# apply sort order
distrfracs = distrfracs[order]
testlasts = np.array(testlasts)[order]
trainlasts = np.array(trainlasts)[order]
radmeans = np.array(radmeans)[order]

# plot train and test accuracy
plt.fill_between(np.log10(distrfracs),
                 trainlasts.min(axis=1) - 5e-3,
                 trainlasts.max(axis=1) + 0e-3,
                 label='train')
plt.fill_between(np.log10(distrfracs),
                 testlasts.min(axis=1) - 0e-3,
                 testlasts.max(axis=1) + 5e-3,
                 label='test')
xlabel('poison fraction (log)')
ylabel('accuracy')
legend(loc='lower left', frameon=False)
print(experiment.log_figure()['web'])

# plot volume (HWHM)
plt.clf()
plt.fill_between(np.log10(distrfracs), radmeans.min(axis=1),
                 radmeans.max(axis=1))
xlabel('poison fraction (log)')
ylabel('half-width-half-min')
print(experiment.log_figure()['web'])
for i in range(hyper_params["N_OBS"]):
    pval_selected = pval_sort[i]
    index_original = int(numpy.where(pval_selected == pval)[0])
    if index_original in idx_noisy:
        index.append(True)
    else:
        index.append(False)

x_line = numpy.arange(0, hyper_params["N_OBS"], step=1)
y_line = numpy.linspace(0, 1, hyper_params["N_OBS"])
y_adj = numpy.arange(
    0, hyper_params["N_OBS"], step=1
) / hyper_params["N_OBS"] * 0.05  # 0.05 means the alpha value of my test
zoom = 40  # nb of points to zoom

fig, (ax1, ax2) = plt.subplots(2, 1)

ax1.scatter(numpy.arange(0, len(pval), 1), pval_sort, c=index)
ax1.plot(x_line, y_line, color="green")
ax1.plot(x_line, y_adj, color="red")
ax1.set_title('Entire dataset')
ax1.set_xticklabels([])

ax2.scatter(numpy.arange(0, zoom, 1), pval_sort[0:zoom], c=index[0:zoom])
ax2.plot(x_line[0:zoom], y_line[0:zoom], color="green")
ax2.plot(x_line[0:zoom], y_adj[0:zoom], color="red")
ax2.set_title('Zoomed in')
ax2.set_xticklabels([])

experiment.log_figure(figure_name="chi2_test", figure=fig, overwrite=True)
plt.show()
Ejemplo n.º 11
0
        best_train_acc = train_acc
    if (test_acc > best_test_acc):
        best_test_acc = test_acc

    experiment.log_metric("best_train_acc", best_train_acc, epoch=epoch + 1)
    experiment.log_metric("best_test_acc", best_test_acc, epoch=epoch + 1)
    experiment.log_metric("train_acc", train_acc, epoch=epoch + 1)
    experiment.log_metric("test_acc", test_acc, epoch=epoch + 1)

    plt.plot(training_loss_list, color='blue', label='Training')
    plt.plot(testing_loss_list, color='red', label='Testing', alpha=.5)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss plot')
    plt.legend()
    plt.savefig("./loss_plot_" + model_name + ".png", format='png')
    experiment.log_figure(figure=plt, figure_name='loss_plot', overwrite=True)
    plt.close()

    plt.plot(training_acc_list, color='blue', label='Training')
    plt.plot(testing_acc_list, color='red', label='Testing', alpha=.5)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy plot')
    plt.legend()
    plt.savefig("./accuracy_plot_" + model_name + ".png", format='png')
    experiment.log_figure(figure=plt,
                          figure_name='accuracy_plot',
                          overwrite=True)
    plt.close()
Ejemplo n.º 12
0
melted = merged_df.melt(id_vars = ['AID','Classifier','Iteration Number','Embedding','test_train','Percent_lib_scanned'],var_name = 'Metric',value_name='Score')
mcc_auc_plot = melted[(melted['Metric'].isin(['auc','mcc']))&
                    (melted['test_train']=='test')]
active_find_plot = melted[(melted['Metric'].isin(['Percent Active Found','Percent Batch Pred Active']))&
                    (melted['test_train']=='test')]
prec_rec_plot = melted[(melted['Metric'].isin(['prec_Active','rec_Active']))&
                    (melted['test_train']=='test')]
mcc_auc_plot.Score = mcc_auc_plot.Score.astype(float)
active_find_plot.Score = active_find_plot.Score.astype(float)
prec_rec_plot.Score = prec_rec_plot.Score.astype(float)

#%%
'''Plot 3 figures: AUC/MCC,%found/totalfound,recall/precision @0.5'''
g = sns.relplot(x="Iteration Number", y="Score", hue='Classifier',style="Metric", col="AID", col_wrap=3, data=mcc_auc_plot,kind='line',legend='full',markers= True,ci = None )
exp.log_figure()
g = sns.relplot(x="Iteration Number", y="Score", hue='Classifier',style="Metric", col="AID", col_wrap=3, data=active_find_plot,kind='line',legend='full',markers= True,ci = None )
exp.log_figure()
g = sns.relplot(x="Iteration Number", y="Score", hue='Classifier',style="Metric", col="AID", col_wrap=3, data=prec_rec_plot,kind='line',legend='full',markers= True,ci = None )
exp.log_figure()
g = sns.relplot(x="Percent_lib_scanned", y="Score", hue='Classifier',style="Metric", col="AID", col_wrap=3, data=active_find_plot,kind='line',legend='full',markers= True,ci = None )
exp.log_figure()
'''Plot prec/recall curves for all points '''
df = merged_df[merged_df['test_train']=='test']
# Initialize the figure
fig, axs = plt.subplots(9, 10)
plt.style.use('seaborn-darkgrid')

# create an aid dict
AID_list =['AID_1345083','AID_624255','AID_449739','AID_995','AID_938','AID_628','AID_596','AID_893','AID_894']
Ejemplo n.º 13
0
ax1.plot(x_line, y_adj, color="red")
ax1.set_title(
    f'Entire test dataset with {int(hyper_params["TEST_NOISE"] * 100)}% of noise'
)
ax1.set_xticklabels([])

ax2.scatter(numpy.arange(0, zoom, 1),
            pval[pval_order][0:zoom],
            c=index[pval_order].reshape(-1)[0:zoom])
ax2.plot(x_line[0:zoom], y_line[0:zoom], color="green")
ax2.plot(x_line[0:zoom], y_adj[0:zoom], color="red")
ax2.set_title('Zoomed in')
ax2.set_xticklabels([])

experiment.log_figure(figure_name="empirical_test_hypothesis",
                      figure=fig,
                      overwrite=True)
plt.show()

# Compute some stats
precision, recall = test_performances(pval, index, hyper_params["ALPHA"])
print(f"Precision: {precision}")
print(f"Recall: {recall}")
experiment.log_metric("precision", precision)
experiment.log_metric("recall", recall)

# Show some examples
fig, axs = plt.subplots(5, 5)
fig.tight_layout()
axs = axs.ravel()
Ejemplo n.º 14
0
def comet_Fold(save_path, embedding_type, model_type, bin_labels):
    from comet_ml import Experiment
    exp = Experiment(api_key="sqMrI9jc8kzJYobRXRuptF5Tj",
                     project_name="80_10_baseline",
                     workspace="gdreiman1",
                     disabled=False)
    exp.log_code = True
    import warnings
    warnings.filterwarnings('ignore')
    import pickle
    import pandas as pd
    import numpy as np
    import sklearn as sklearn
    from sklearn.metrics import precision_recall_fscore_support as prf
    from sklearn.linear_model import SGDClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.preprocessing import StandardScaler, LabelEncoder
    import matplotlib.pyplot as plt
    import seaborn as sns
    '''Comet Saving Zone'''
    def comet_addtional_info(exp, save_path, metrics_dict, X_test, y_test,
                             embedding_type, model_type):
        #get AID number
        import ntpath
        #get base file name
        folder, base = ntpath.split(save_path)
        #split file name at second _ assumes file save in AID_xxx_endinfo.pkl
        AID, _, end_info = base.rpartition('_')
        exp.add_tag(AID)
        #save data location, AID info, and version info
        exp.log_dataset_info(name=AID, version=end_info, path=save_path)
        #save some informatvie tags:
        tags = [AID, end_info, model_type]
        exp.add_tags(tags)
        exp.add_tag(embedding_type)
        #save metrics_dict in data_folder with comet experiement number associated
        exp_num = exp.get_key()
        model_save = folder + '\\' + model_type + '_' + exp_num + 'metrics_dict.pkl'
        pickle_on = open(model_save, 'wb')
        pickle.dump(metrics_dict, pickle_on)
        pickle_on.close()
        #log trained model location
        exp.log_other('Metrics Dict Path', model_save)
        #tell comet that the experiement is over
        exp.end()

    def get_Scaled_Data(train_ind, test_ind, X_mfp, activity_table, labels,
                        bin_labels):
        #get start and end index for molchars
        MC_start = activity_table.columns.get_loc('Chi0')
        #need to add 1 bc exclusive indexing
        MC_end = activity_table.columns.get_loc('VSA_EState9') + 1
        # standardize data
        scaler = StandardScaler(copy=False)
        #return requested datatype
        if embedding_type == 'MFPMolChars':
            X_train_molchars_std = scaler.fit_transform(
                np.array(activity_table.iloc[train_ind,
                                             MC_start:MC_end]).astype(float))
            X_test_molchars_std = scaler.transform(
                np.array(activity_table.iloc[test_ind,
                                             MC_start:MC_end]).astype(float))
            X_train = np.concatenate(
                (X_mfp[train_ind, :], X_train_molchars_std), axis=1)
            X_test = np.concatenate((X_mfp[test_ind, :], X_test_molchars_std),
                                    axis=1)
        elif embedding_type == 'MFP':
            X_train = X_mfp[train_ind, :]
            X_test = X_mfp[test_ind, :]
        elif embedding_type == 'MolChars':
            X_train_molchars_std = scaler.fit_transform(
                np.array(activity_table.iloc[train_ind,
                                             MC_start:MC_end]).astype(float))
            X_test_molchars_std = scaler.transform(
                np.array(activity_table.iloc[test_ind,
                                             MC_start:MC_end]).astype(float))
            X_train = X_train_molchars_std
            X_test = X_test_molchars_std
        y_train = labels[train_ind]
        y_test = labels[test_ind]
        #remapping active to 1 and everything else to zero
        bin_y_train, bin_y_test = np.array([
            1 if x == 0 else 0 for x in y_train
        ]), np.array([1 if x == 0 else 0 for x in y_test])
        if bin_labels == True:
            y_test = bin_y_test
            y_train = bin_y_train
        return X_train, X_test, y_train, y_test

    def train_SVM(X_train, X_test, y_train, y_test, split_ID):
        sgd_linear_SVM = SGDClassifier(loss='hinge',
                                       penalty='l2',
                                       alpha=0.0001,
                                       l1_ratio=0.15,
                                       fit_intercept=True,
                                       max_iter=500000,
                                       tol=0.001,
                                       shuffle=True,
                                       verbose=0,
                                       epsilon=0.1,
                                       n_jobs=-1,
                                       random_state=None,
                                       learning_rate='optimal',
                                       eta0=0.0,
                                       power_t=0.5,
                                       early_stopping=False,
                                       validation_fraction=0.1,
                                       n_iter_no_change=5,
                                       class_weight='balanced',
                                       warm_start=False,
                                       average=False)
        sgd_linear_SVM_model = sgd_linear_SVM.fit(X_train, y_train)

        sgd_lSVM_preds = sgd_linear_SVM_model.predict(X_test)
        prec, rec, f_1, supp = prf(y_test, sgd_lSVM_preds, average=None)
        class_rep = sklearn.metrics.classification_report(
            y_test, sgd_lSVM_preds)
        exp.log_other('Classification Report' + split_ID, class_rep)
        mcc = sklearn.metrics.matthews_corrcoef(y_test, sgd_lSVM_preds)

        #if first iteration, report model parameters to comet
        if split_ID == '0':
            exp.log_parameters(sgd_linear_SVM_model.get_params())
        return prec, rec, f_1, supp, mcc

    def train_RF(X_train, X_test, y_train, y_test, split_ID):

        rf = RandomForestClassifier(n_estimators=100,
                                    random_state=2562,
                                    class_weight="balanced_subsample",
                                    n_jobs=-1)
        rand_for = rf.fit(X_train, y_train)
        rf_preds = rand_for.predict(X_test)
        prec, rec, f_1, supp = prf(y_test, rf_preds, average=None)
        class_rep = sklearn.metrics.classification_report(y_test, rf_preds)
        exp.log_other('Classification Report' + split_ID, class_rep)
        mcc = sklearn.metrics.matthews_corrcoef(y_test, rf_preds)

        #if first iteration, report model parameters to comet
        if split_ID == '0':
            exp.log_parameters(rand_for.get_params())
        return prec, rec, f_1, supp, mcc

    def train_LGBM(X_train, X_test, y_train, y_test, split_ID):
        import lightgbm as lgb
        #make model class
        lgbm_model = lgb.LGBMClassifier(boosting_type='gbdt',
                                        num_leaves=31,
                                        max_depth=-1,
                                        learning_rate=0.1,
                                        n_estimators=500,
                                        subsample_for_bin=200000,
                                        objective='binary',
                                        is_unbalance=True,
                                        min_split_gain=0.0,
                                        min_child_weight=0.001,
                                        min_child_samples=20,
                                        subsample=1.0,
                                        subsample_freq=0,
                                        colsample_bytree=1.0,
                                        reg_alpha=0.0,
                                        reg_lambda=0.0,
                                        random_state=None,
                                        n_jobs=-1,
                                        silent=True,
                                        importance_type='split')
        #train model
        lgbm = lgbm_model.fit(X_train, y_train)
        lgbm_preds = lgbm.predict(X_test)
        prec, rec, f_1, supp = prf(y_test, lgbm_preds, average=None)
        class_rep = sklearn.metrics.classification_report(y_test, lgbm_preds)
        exp.log_other('Classification Report' + split_ID, class_rep)
        mcc = sklearn.metrics.matthews_corrcoef(y_test, lgbm_preds)

        #if first iteration, report model parameters to comet
        if split_ID == '0':
            exp.log_parameters(lgbm.get_params())
        return prec, rec, f_1, supp, mcc

    #from https://stackoverflow.com/questions/6027558/flatten-nested-dictionaries-compressing-keys
    import collections

    def flatten(d, parent_key='', sep='_'):
        items = []
        for k, v in d.items():
            new_key = parent_key + sep + k if parent_key else k
            if isinstance(v, collections.MutableMapping):
                items.extend(flatten(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

    #get data cleaned
    pickle_off = open(save_path, 'rb')
    activity_table = pickle.load(pickle_off)
    pickle_off.close()
    #get length of MFP
    fp_length = len(activity_table.iloc[5]['MFP'])
    #reshape mfp
    X_mfp = np.concatenate(np.array(activity_table['MFP'])).ravel()
    X_mfp = X_mfp.reshape((-1, fp_length))
    le = LabelEncoder()
    labels = le.fit_transform(activity_table['PUBCHEM_ACTIVITY_OUTCOME'])
    #split data:
    from sklearn.model_selection import StratifiedShuffleSplit
    #this is outer 5fold cross validation i.e. 80/20 split
    big_splitter = StratifiedShuffleSplit(n_splits=5,
                                          test_size=0.2,
                                          random_state=2562)
    #inner replicateing the start with 10% of data (or 12.5% of 80% intial split)
    little_splitter = StratifiedShuffleSplit(n_splits=8,
                                             test_size=0.2,
                                             train_size=0.125,
                                             random_state=2562)
    #this holds all the metrics values that will be stored in comet
    metric_dict = {}
    metric_names = [
        'prec_Inactive', 'prec_Active', 'rec_Inactive', 'rec_Active',
        'f_1_Inactive', 'f_1_Active', 'supp_Inactive', 'supp_Active', 'mcc'
    ]

    def calc_and_save_metrics(X_train, X_test, y_train, y_test, split_index,
                              metric_names, metric_dict_list, map_name):
        '''Takes in test and train data + labels, computes metrics and saves them
        as a dict inside of the provided list. Returns this list.'''
        prec, rec, f_1, supp, mcc = classifier_train(X_train, X_test, y_train,
                                                     y_test, split_index)
        results_array = np.concatenate((prec, rec, f_1, supp)).tolist() + [mcc]
        metric_dict_list.append(
            dict(
                zip(['ID', 'Split Info'] + metric_names,
                    [split_index, map_name] + results_array)))
        return metric_dict_list

    #determine model type
    classifier_dict = {'SVM': train_SVM, 'RF': train_RF}
    #set dummy variable to func that trains specified model
    classifier_train = classifier_dict[model_type]
    metric_dict_list = []
    #using labels as a dummy for X
    for split_num, [train_ind,
                    test_ind] in enumerate(big_splitter.split(labels, labels)):
        #indexs which split the data comes from X.X ie big.little
        split_index = str(split_num)
        map_name = 'Split' + split_index + ' 80% train'
        #get test/train index
        X_train, X_test, y_train, y_test = get_Scaled_Data(
            train_ind, test_ind, X_mfp, activity_table, labels, bin_labels)
        #train model and get back classwise metrics
        metric_dict_list = calc_and_save_metrics(X_train, X_test, y_train,
                                                 y_test, split_index,
                                                 metric_names,
                                                 metric_dict_list, map_name)
        #add split_index to metric names this assumes 0 = inactive 1 = active!!

        for little_split_num, [little_train_ind, little_test_ind] in enumerate(
                little_splitter.split(labels[train_ind], labels[train_ind])):
            split_index = str(split_num) + '.' + str(little_split_num)
            #get test/train index
            X_train, X_test, y_train, y_test = get_Scaled_Data(
                little_train_ind, test_ind, X_mfp, activity_table, labels,
                bin_labels)
            map_name = 'Split' + str(split_num) + ' 10% train'
            #train model and get back classwise metrics
            #check if train_split contains both postive and negative labels
            if len(set(y_train)) == 2:
                metric_dict_list = calc_and_save_metrics(
                    X_train, X_test, y_train, y_test, split_index,
                    metric_names, metric_dict_list, map_name)

    # now convert metric_dict_list to df:
    metrics_df = pd.DataFrame(metric_dict_list)
    #set Split_ID to index
    metrics_df.set_index('ID', drop=True, inplace=True)
    #now plot all the columns
    #first make a new df column to ID things as either split
    cols_to_plot = list(metrics_df.columns.values)[1:]
    #turn off plotting
    plt.ioff()
    for metric in cols_to_plot:
        #make sns boxplot
        ax = sns.boxplot(x='Split Info', y=metric, data=metrics_df)
        ax.set_xticklabels(ax.get_xticklabels(), rotation=30)
        plt.tight_layout()
        #log the plot
        exp.log_figure()
    ''' now we're going to go through and calculate means and stds for 3 diff groups
        1) the 5 80% train runs
        2) the 5 sets of 8 10% runs
        3) the 40 total 10% runs
        we save each in a list as a pd Series with a name explaining the contents'''
    avg_std_list = []
    #get 5 80% mean and std
    avg_std_list.append(
        (metrics_df[~metrics_df.index.str.contains(r'\.')].mean(
            axis=0, numeric_only=True)).rename('50_80_mean'))
    avg_std_list.append((metrics_df[~metrics_df.index.str.contains(r'\.')].std(
        axis=0, numeric_only=True)).rename('50_80_std'))
    #get 40 10% mean and std
    avg_std_list.append((metrics_df[metrics_df.index.str.contains(r'\.')].mean(
        axis=0, numeric_only=True)).rename('40_10_mean'))
    avg_std_list.append((metrics_df[metrics_df.index.str.contains(r'\.')].std(
        axis=0, numeric_only=True)).rename('40_10_std'))
    #get the splitwise 10% mean and std
    for split_num in range(5):
        id_split = str(split_num) + r'\.'
        avg_std_list.append(
            (metrics_df[metrics_df.index.str.contains(id_split)].mean(
                axis=0,
                numeric_only=True)).rename(str(split_num) + '_10_mean'))
        avg_std_list.append(
            (metrics_df[metrics_df.index.str.contains(id_split)].std(
                axis=0, numeric_only=True)).rename(str(split_num) + '_10_std'))
    #now add list of dicts of averages to metrics df
    metrics_df = metrics_df.append(avg_std_list)
    #convert metrics_df to metric dict and log it
    metric_dict = metrics_df.to_dict('index')
    exp.log_metrics(flatten(metric_dict))
    #save metric_df to current folder
    comet_addtional_info(exp, save_path, metrics_df, X_test, y_test,
                         embedding_type, model_type)
Ejemplo n.º 15
0
def train_cifar10(batch_size: int,
                  learning_rate: float,
                  epochs: int,
                  experiment: Experiment,
                  model: Sequential = get_model(),
                  initial_epoch: int = 0,
                  training_datagen: ImageDataGenerator = ImageDataGenerator(),
                  scheduler: Callable[[int], float] = None,
                  early_stopping_th: Optional[int] = 250,
                  data_portion: float = 1.0,
                  find_lr: bool = False) -> None:
    preprocessing_fnc = training_datagen.preprocessing_function
    name = experiment.get_key()
    log_path, model_path = get_output_paths(name)
    data = get_cifar10_data(data_portion=data_portion)

    training_datagen.fit(data.x_train)
    log_images(data.x_train, training_datagen, experiment)
    log_input_images(data.x_train, data.y_train, training_datagen, experiment)

    opt = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    log_model_plot(experiment, model)

    csv_cb = CSVLogger(log_path)
    keep_best_cb = KeepBest('val_acc')
    callbacks = [csv_cb,
                 keep_best_cb]  # [csv_cb, early_stopping_cb, keep_best_cb]
    if early_stopping_th is not None:
        early_stopping_cb = EarlyStopping('val_acc',
                                          patience=early_stopping_th,
                                          restore_best_weights=True,
                                          verbose=2)
        callbacks.append(early_stopping_cb)
    if scheduler is not None:
        scheduler.experiment_log(experiment=experiment,
                                 epochs=list(range(epochs)))
        callbacks.append(LearningRateScheduler(scheduler))
    if find_lr:
        lrf = LearningRateFinder(model=model)
        lrf.lrMult = (10e-1 / learning_rate)**(
            1.0 / (epochs * len(data.x_train) / batch_size))
        callbacks = [
            LambdaCallback(
                on_batch_end=lambda batch, logs: lrf.on_batch_end(batch, logs))
        ]

    model.fit_generator(training_datagen.flow(data.x_train,
                                              data.y_train,
                                              batch_size=batch_size),
                        steps_per_epoch=len(data.x_train) / batch_size,
                        epochs=epochs,
                        validation_data=(preprocessing_fnc(data.x_dev),
                                         data.y_dev),
                        shuffle=True,
                        callbacks=callbacks,
                        verbose=2,
                        initial_epoch=initial_epoch)
    model.save(model_path)
    experiment.log_asset(model_path)
    experiment.log_asset(log_path)

    if find_lr:
        experiment.log_figure('lr vs acc', lrf.plot_loss())

    log_final_metrics(experiment, model, data, preprocessing_fnc)
Ejemplo n.º 16
0
    def run_exp(self):
        ''' Run the experiment on given number of pilots. '''
        
        # Pre-train
        pretrain_weights_filename  = '{}{}.h5'.format(self.weights_savename_prefix, '_all')
        if self.pretraining:
            print('Pretraining...')                                                                                                                 
            _ = self.pretrain(pretrain_weights_filename, self.patience)
                
        for pilot_idx in range(1, self.n_pilots + 1):
            if self.log_pilots:
                experiment = Experiment(api_key='cSZq9kuH2I87ezvm2dEWTx6op', project_name='pilot{}'.format(pilot_idx), log_code=False, auto_param_logging=False)
            else:
                experiment = None
            
            # Load pilot data
            X_train, y_train, X_valid, y_valid, X_test, y_test = self.load_data(pilot_idx, valid_ratio=0.2)
            
            # Construct model & load pre-trained weights if available
            weights_filename  = '{}{}.h5'.format(self.weights_savename_prefix, pilot_idx)
            self.model = self.build_model(load_weights=True if self.pretraining else False,
                                          weights_filename=pretrain_weights_filename)
            
            # Train
            print('First phase training - Pilot {}'.format(pilot_idx))
            hist, best_epoch = self.train(X_train, y_train, X_valid, y_valid,
                                          save_filename='{}{}.h5'.format(self.weights_savename_prefix, pilot_idx),
                                          patience=self.patience, experiment=experiment)
            self.histories.append(hist)
             
            # Test (before extra training)
            self.test(pilot_idx, weights_filename, X_train, y_train, X_test, y_test, False)
                                
            # Extra-train
            if self.train_after_es:
                hist_es = self.extra_train(pilot_idx, X_valid, y_valid, weights_filename, hist.history['loss'][best_epoch], self.max_after_es_epochs) # New
                self.histories_es.append(hist_es)
                
                # Test (after extra training)
                self.test(pilot_idx, weights_filename, X_train, y_train, X_test, y_test, True)
            
            if self.log_pilots:
                experiment.log_metrics({'Test accuracy' : self.test_score['accuracy'][-1]})

                # Get t-SNE from intermediary outputs
                layer_idxs = self.tsne_layer_idxs
                get_output_functions = [ K.function([self.model.layers[0].input], [self.model.layers[idx].output]) for idx in layer_idxs]
                
                # Training dataset
                layer_outputs = [ get_output([X_train])[0] for get_output in get_output_functions ]
                [ experiment.log_figure(figure_name='tsne_raw_train{}_layer{}'.format(pilot_idx, layer_idxs[idx]),
                                        figure=tsne_plot(layer_outputs[idx], y_train, 20, title="t-SNE - Pilot {} - Layer {} (train)".format(pilot_idx, layer_idxs[idx]))) 
                                        for idx in range(len(layer_idxs)) ]
                                        
                # Testing dataset
                layer_outputs = [ get_output([X_test])[0] for get_output in get_output_functions ]
                [ experiment.log_figure(figure_name='tsne_raw_test{}_layer{}'.format(pilot_idx, layer_idxs[idx]),
                                        figure=tsne_plot(layer_outputs[idx], y_test, 20, title="t-SNE - Pilot {} - Layer {} (test)".format(pilot_idx, layer_idxs[idx]))) 
                                        for idx in range(len(layer_idxs)) ]
                plt.close('all')
                experiment.end()
            
        # Export logs to Comet.ml
        if self.comet_log:
            self.log()
        
        return self.test_score
Ejemplo n.º 17
0
class Logger:
    """
    Logs/plots results to comet.

    Args:
        exp_config (dict): experiment configuration hyperparameters
        model_config (dict): model configuration hyperparameters
        data_config (dict): data configuration hyperparameters
    """
    def __init__(self, exp_config, model_config, data_config):
        self.experiment = Experiment(**exp_config['comet_config'])
        self.experiment.disable_mp()
        self._log_hyper_params(exp_config, model_config, data_config)
        self._epoch = 0

    def _log_hyper_params(self, exp_config, model_config, data_config):
        """
        Log the hyper-parameters for the experiment.

        Args:
            exp_config (dict): experiment configuration hyperparameters
            model_config (dict): model configuration hyperparameters
            data_config (dict): data configuration hyperparameters
        """
        def flatten_arg_dict(arg_dict):
            flat_dict = {}
            for k, v in arg_dict.items():
                if type(v) == dict:
                    flat_v = flatten_arg_dict(v)
                    for kk, vv in flat_v.items():
                        flat_dict[k + '_' + kk] = vv
                else:
                    flat_dict[k] = v
            return flat_dict

        self.experiment.log_parameters(flatten_arg_dict(exp_config))
        self.experiment.log_parameters(flatten_arg_dict(model_config))
        self.experiment.log_parameters(flatten_arg_dict(data_config))

    def log(self, results, train_val):
        """
        Plot the results in comet.

        Args:
            results (dict): dictionary of metrics to plot
            train_val (str): either 'train' or 'val'
        """
        objectives, grads, params, images, metrics = results
        for metric_name, metric in objectives.items():
            self.experiment.log_metric(metric_name + '_' + train_val, metric,
                                       self._epoch)
            print(metric_name, ':', metric.item())
        if train_val == 'train':
            for grad_metric_name, grad_metric in grads.items():
                self.experiment.log_metric('grads_' + grad_metric_name,
                                           grad_metric, self._epoch)
        for param_name, param in params.items():
            self.experiment.log_metric(param_name + '_' + train_val, param,
                                       self._epoch)
        for image_name, imgs in images.items():
            self.plot_images(imgs, image_name, train_val)
        for metric_name, metric in metrics.items():
            self.experiment.log_metric(metric_name + '_' + train_val, metric,
                                       self._epoch)
        if train_val == 'val':
            self._epoch += 1

    def plot_images(self, images, title, train_val):
        """
        Plot a tensor of images.

        Args:
            images (torch.Tensor): a tensor of shape [steps, b, c, h, w]
            title (str): title for the images, e.g. reconstructions
            train_val (str): either 'train' or 'val'
        """
        # add a channel dimension if necessary
        if len(images.shape) == 4:
            s, b, h, w = images.shape
            images = images.view(s, b, 1, h, w)
        s, b, c, h, w = images.shape
        if b > 10:
            images = images[:, :10]
        # swap the steps and batch dimensions
        images = images.transpose(0, 1).contiguous()
        images = images.view(-1, c, h, w)
        grid = make_grid(images.clamp(0, 1), nrow=s).numpy()
        if c == 1:
            grid = grid[0]
            cmap = 'gray'
        else:
            grid = np.transpose(grid, (1, 2, 0))
            cmap = None
        plt.imshow(grid, cmap=cmap)
        self.experiment.log_figure(figure=plt,
                                   figure_name=title + '_' + train_val)
        plt.close()

    def save(self, model):
        """
        Save the model weights in comet.

        Args:
            model (nn.Module): the model to be saved
        """
        pass
Ejemplo n.º 18
0
        # if i % 2000 == 0:    # print every 2000 mini-batches
        #     print('[%d, %5d] loss: %.3f' %
        #           (epoch, i, running_loss / 2000))
        #     running_loss = 0.0
        end_time = time.time()
        # print("Time", end_time-start_time)
    loss_per_epoch.append(np.mean(epoch_loss))
    model_scheduler.step(np.mean(epoch_loss))
    plt.plot(loss_per_epoch, color='red', label='Training')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss plot per epoch')
    plt.legend()
    plt.savefig("./loss_per_epoch_plot_baseline.png", format='png')
    experiment.log_figure(figure=plt,
                          figure_name='loss_per_epoch_plot_baseline',
                          overwrite=True)
    plt.close()

    if (epoch % 5 == 0):

        # test
        correct = 0
        total = 0
        test_loss = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data[0].to(
                    device, non_blocking=True), data[1].to(device,
                                                           non_blocking=True)
                outputs = net(inputs)
Ejemplo n.º 19
0
    os.makedirs('pickle', exist_ok=True); pickle.dump(dw1, open(join('pickle', args.ckpt), 'wb'))
  along = 'along_eigvec'
else:
  dw1 = evaluator.get_random_dir()
  along = 'along_random_'+str(args.seed)

# span
cfeed = args.span/2 * np.linspace(-1, 1, 30)
cfeed_enum = list(enumerate(cfeed)); random.shuffle(cfeed_enum) # shuffle order so we see plot shape sooner on comet

# loop over all points along surface direction
name = 'span_' + str(args.span) + '/' + basename(args.ckpt) + '/' + along # name of experiment
xent = np.zeros(len(cfeed))
weights = evaluator.get_weights()
for i, (idx, c) in enumerate(cfeed_enum):

  perturbedWeights = [w + c * d1 for w, d1 in zip(weights, dw1)]
  evaluator.assign_weights(perturbedWeights)
  xent[idx], acc, _ = evaluator.eval()
  experiment.log_metric(name, xent[idx], idx)
  print('progress:', i + 1, 'of', len(cfeed_enum), '| time:', time())

# save plot data and log the figure
xent = np.reshape(np.array(xent), cfeed.shape)
plt.plot(cfeed, xent)
experiment.log_figure(name)

unique = utils.timenow()
pickle.dump((cfeed, xent), open(unique, 'wb'))
experiment.log_asset(file_path=unique, file_name=name+'.pkl')
Ejemplo n.º 20
0
def comet_lgbm(save_path):
    from comet_ml import Experiment
    exp = Experiment(api_key="sqMrI9jc8kzJYobRXRuptF5Tj",
                            project_name="baseline", workspace="gdreiman1")
    exp.log_code = True
    
    import pickle
    import pandas as pd
    import lightgbm as lgb
    import numpy as np
    import sklearn
    import matplotlib.pyplot as plt
    from sklearn.metrics import precision_recall_fscore_support as prf
    #%%
    def single_roc(y_preds,y_true):
        
        from sklearn.metrics import roc_curve, auc,precision_recall_curve
        fpr, tpr, _ = roc_curve(y_true, y_preds)
        roc_auc = auc(fpr, tpr)
        plt.figure()
        lw = 2
        plt.plot(fpr, tpr, color='darkorange',
                 lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic example')
        
        precision, recall, thresholds = precision_recall_curve(y_true, y_preds)
        plt.plot(recall, precision, color='blue',
                 lw=lw, label='Precision vs Recall')
        # show the plot
        plt.legend(loc="lower right")
        plt.show()
    def multi_roc(y_preds,y_true,name,n_classes):
        import collections
        nested_dict = lambda: collections.defaultdict(nested_dict)
        data_store = nested_dict()
        from sklearn.metrics import roc_curve, auc
        from scipy import interp
        from itertools import cycle
        lw = 2
        name_store = ['Active', 'Inactive', 'Inconclusive']
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_preds[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        
        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_true[:, i].ravel(), y_preds[:, i].ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
        # Compute macro-average ROC curve and ROC area
        
        # First aggregate all false positive rates
        all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
        
        # Then interpolate all ROC curves at this points
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(n_classes):
            mean_tpr += interp(all_fpr, fpr[i], tpr[i])
        
        # Finally average it and compute AUC
        mean_tpr /= n_classes
        
        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
        
        # Plot all ROC curves
        plt.figure()
        plt.plot(fpr["micro"], tpr["micro"],
                 label='micro-average ROC curve (area = {0:0.2f})'
                       ''.format(roc_auc["micro"]),
                 color='deeppink', linestyle=':', linewidth=4)
        
        plt.plot(fpr["macro"], tpr["macro"],
                 label='macro-average ROC curve (area = {0:0.2f})'
                       ''.format(roc_auc["macro"]),
                 color='navy', linestyle=':', linewidth=4)
        
        colors = cycle(['aqua', 'darkorange', 'cornflowerblue','green'])
        for i, color in zip(range(n_classes), colors):
            plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                     label='ROC curve of '+ name_store[i]+'(area = {1:0.2f})'
                     ''.format(i, roc_auc[i]))
        
        plt.plot([0, 1], [0, 1], 'k--', lw=lw)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        #plt.title('Multi-class ROC for '+name+' Split= '+str(count+1))
        plt.title('Multi-class ROC for '+name)
    
        plt.legend(loc="lower right")
        #plt.show()
    #%%
    #save_path = r'C:\Users\gdrei\Dropbox\UCL\Thesis\May_13\AID_1345083_processed.pkl'
    model_type = 'lgbm'
    #get data cleaned
    pickle_off = open(save_path,'rb')
    activity_table=pickle.load(pickle_off)
    pickle_off.close()
    #get length of MFP
    fp_length = len(activity_table.iloc[5]['MFP'])
    
    
    from sklearn.preprocessing import StandardScaler, LabelEncoder
    scaler = StandardScaler(copy = False)
    le = LabelEncoder()
    labels = le.fit_transform(activity_table['PUBCHEM_ACTIVITY_OUTCOME'])
    #split data:
    from sklearn.model_selection import StratifiedShuffleSplit
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.5, train_size=None, random_state=2562)
    X_mfp = np.concatenate(np.array(activity_table['MFP'])).ravel()
    X_mfp = X_mfp.reshape((-1,fp_length))
    for train_ind, test_ind in splitter.split(X_mfp,labels):
        # standardize data
        X_train_molchars_std = scaler.fit_transform(np.array(activity_table.iloc[train_ind,4:]))
        X_test_molchars_std = scaler.transform(np.array(activity_table.iloc[test_ind,4:]))
        X_train = np.concatenate((X_mfp[train_ind,:],X_train_molchars_std),axis = 1)
        X_test = np.concatenate((X_mfp[test_ind,:],X_test_molchars_std),axis = 1)
        y_train = labels[train_ind]
        y_test = labels[test_ind]
        #X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,labels,test_size = .5, shuffle = True, stratify = labels, random_state = 2562)
        bin_y_train, bin_y_test = [1 if x ==2 else x for x in y_train],[1 if x ==2 else x for x in y_test]
        
    #do light gbm
        
    #need to make a lib svm file
    train_data = lgb.Dataset(X_train,label=y_train)
    test_data = lgb.Dataset(X_test,label=y_test)
    #make model class
    lgbm_model = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=500, subsample_for_bin=200000, 
                                    objective='binary', is_unbalance=True, min_split_gain=0.0, min_child_weight=0.001, min_child_samples=20, subsample=1.0, 
                                    subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0, random_state=None, n_jobs=-1, silent=True, 
                                    importance_type='split')
    #train model
    trained_mod = lgbm_model.fit(X_train,y_train)
    #predict classes and class_probs
    test_class_preds = lgbm_model.predict(X_test)
    test_prob_preds = lgbm_model.predict_proba(X_test)
    #calculate Class report
    class_rep = sklearn.metrics.classification_report(y_test,test_class_preds)
    
    print(class_rep)
    if len(set(y_test)) == 2:
        single_roc(test_prob_preds[:,1],y_test)
        prec,rec,f_1,supp = prf(y_test, test_class_preds, average=None)
    else:
        from tensorflow.keras.utils import to_categorical
        multi_roc(test_prob_preds,to_categorical(y_test),'',3)
        prec,rec,f_1,supp = prf(y_test, test_class_preds, average=None)
    
    
     #%% 
    '''Comet Saving Zone'''
    #get AID number
    import ntpath
    #get base file name
    folder,base = ntpath.split(save_path)
    #split file name at second _ assumes file save in AID_xxx_endinfo.pkl
    AID, _,end_info = base.rpartition('_')
    #save data location, AID info, and version info
    exp.log_dataset_info(name = AID, version = end_info, path = save_path)
    #save model params
    exp.log_parameters(trained_mod.get_params())
    #save metrics report to comet
    if len(f_1) == 2:
        for i,name in enumerate(['Active','Inactive']):
            exp.log_metric('f1 class '+name, f_1[i])
            exp.log_metric('Recall class'+name,rec[i])
            exp.log_metric('Precision class'+name, prec[i])
    else:
        for i,name in enumerate(['Active','Inconclusive','Inactive']):
            exp.log_metric('f1 class '+str(i), f_1[i])
            exp.log_metric('Recall class'+str(i),rec[i])
            exp.log_metric('Precision class'+str(i), prec[i])
        #exp.log_metric('f1 class '+str(i), f_1[i])
        #exp.log_metric('Recall class'+str(i),rec[i])
        #exp.log_metric('Precision class'+str(i), prec[i])
    exp.log_other('Classification Report',class_rep)
     #save model in data_folder with comet experiement number associated
    exp_num = exp.get_key()
    model_save = folder+'\\'+model_type+'_'+exp_num+'.pkl'
    pickle_on = open(model_save,'wb')
    pickle.dump(trained_mod,pickle_on)
    pickle_on.close()
    #log trained model location
    exp.log_other('Trained Model Path',model_save)
    #save some informatvie tags:
    tags = [AID,end_info,model_type]
    exp.add_tags(tags)
    #save ROC curve
    exp.log_figure(figure_name = 'ROC-Pres/Recall',figure=plt)
    plt.show()

    #tell comet that the experiement is over
    exp.end()
Ejemplo n.º 21
0
class experiment_logger:
    '''
    Interface for logging experiments on neptune, comet, or both.
    Args: log_backend, project_name)
    Other backends may also be added in the future
    Currently defined methods:
        add_params:
        add_tags:
        log_text: strings
        log_metrics: numerical values
        log_figure: pyplot figures
        
        stop: end logging and close connection
    '''
    def __init__(self, log_backend, project_name):
        '''

        Parameters
        ----------
        log_backend : STR
            One of 'comet', 'neptune', 'all'
        project_name : STR
            one of available proyects ('yeast', 'jersey', 'wheat', 'debug', etc)
            
        Returns
        -------
        None.

        '''
        self.proj_name = project_name
        self.backend = log_backend
        #Bool indicating wether neptune logging is enabled
        self.neptune = log_backend=='neptune' or log_backend=='all'
        #Bool indicating wether comet logging is enabled
        self.comet = log_backend=='comet' or log_backend=='all'
        if self.neptune:
            neptune.init("dna-i/"+project_name, 
                         api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiMWYzMzhjMjItYjczNC00NzZhLWFlZTYtOTI2NzE5MzUwZmNkIn0=')
            print("logging experiments on neptune project "+project_name)
            neptune.create_experiment()
        if self.comet:
            self.comet_experiment = Experiment(api_key="V0OXnWOi4KVNS4OkwLjdnxSgK",
                            project_name=project_name, workspace="dna-i")
            print("logging experiments on comet project "+project_name)
        if not (self.neptune or self.comet):
            raise ValueError('Logging Backend NOT Available')    
    def add_params(self, params, step=None ):
        '''
        Adds parameters to experiment log

        Parameters
        ----------
        params : Dict
            Key-Value pairs

        Returns
        -------
        None.

        '''
        if self.neptune:
            for key, value in params.items():
                neptune.set_property(key, value)   
            if step is not None:
               neptune.set_property('step', step)
        if self.comet:
            self.comet_experiment.log_parameters(params,step=step)
    def add_tags(self, tags):
        '''
        Adds parameters to experiment log

        Parameters
        ----------
        params : tags
            list of tags (strings)
            e.g.: ['tag1', 'tag2']
            
        Returns
        -------
        None.

        '''
        if self.neptune:
            neptune.append_tag(tags)   
        if self.comet:
            self.comet_experiment.add_tags(tags)
 
    def log_metrics(self, name, value, epoch=None):
        '''
        Logging pointwise metrics

        Parameters
        ----------
        name : STR
            Metric key
        value : Float/Integer/(Boolean/String)
            Comet also allows Boolean/string
            Tuples are lallowed
        epoch: (OPT)  INT
            Epoch - or anything used as x axis when plotting metrics

        Returns
        -------
        None.

        '''
        if self.neptune:
            try:
                if epoch is not None:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            neptune.log_metric(name  + n,epoch,y=val)
                    else:
                        neptune.log_metric(name, epoch, y=value)
                else:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            neptune.log_metric(name+n, val)
                    else:
                        neptune.log_metric(name, value)
            except:
                print("Metric type {} not supported by neptune.".format(type(value)))
                print("logging as text")
                self.log_text( "{}".format(value), key=name)
                
        if self.comet:    
            try:
                if epoch is not None:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            self.comet_experiment.log_metric(name+n, val, step=int(epoch))
                    else:
                        self.comet_experiment.log_metric(name, value, epoch=epoch)
                else:
                    if type(value) is tuple:
                        print("Logging tuple as r and p-value")
                        for val, n in zip(value, [" (r)", " (p-val)"]):
                            self.comet_experiment.log_metric(name+n, val)
                    else:
                        self.comet_experiment.log_metric(name, value)
            except:
                print("Metric type {} not supported by comet.".format(type(value)))
                if type(value) is tuple:
                    print("Logging tuple as x-y pairs")
                    for idx, val in enumerate(value):
                        self.comet_experiment.log_metric(name, val, epoch=idx) 
                else:
                    print("Logging as other.")
                    self.comet_experiment.log_other(name, value)
                
    def log_text(self, string, key=None, epoch=None):
        '''
          Logs text strings

          Parameters
          ----------
          string : STR
              text to  log
          key: STR
              log_name needed for Neptune strings 
          epoch: INT
              epoch or any other index
          
          Returns
          -------
          None.

        '''
        if self.neptune:
            if type(string) is str:
                if key is None:
                    print('Neptune log_name needed for logging text')
                    print('Using a dummy name: text')
                    neptune.log_text('text', string)
                if epoch is None:
                    neptune.log_text(key, string)
                else:
                    neptune.log_text(key, epoch, y=string)        
            else:
                print("Wrong type: logging text must be a string")
        if self.comet:                
            if type(string) is str:
                if key is not None:
                    print("Commet text logging does not  support keys, prepending it to text")
                    string = key+ ', '+string
                if epoch is None:
                    self.comet_experiment.log_text(string)
                else:
                    self.comet_experiment.log_text(string, step=epoch)
            else:
                print("Wrong type: logging text must be a string")
        
    def log_figure(self, figure=None, figure_name=None, step=None):
        '''
        Logs pyplot figure

        Parameters
        ----------
        figure : pyplot figure, optional in comet mandatory in neptune.
            The default is None, uses global pyplot figure.
        figure_name : STR, optional in comet mandatory in neptune.
             The default is None.
        step : INT, optional
            An index. The default is None.

        Returns
        -------
        None.

        '''
        if self.neptune:
            if figure is not None:
                if figure_name is None:
                    print("Figure name must be given to neptune logger")
                    print("Using dummy name: figure")
                    figure_name = 'figure'
                if step is None:
                    neptune.log_image(figure_name, figure)
                else:
                    neptune.log_image(figure_name, step, y=figure)    
            else:
                print("A figure must be passed to neptune logger")
        if self.comet:    
            self.comet_experiment.log_figure(figure_name=figure_name, figure=figure, step=step) 
    def stop(self):
        if self.neptune:
            neptune.stop()
        if self.comet:
            self.comet_experiment.end()
        
    def add_table(self, filename, tabular_data=None, headers=False):
        
        self.comet_experiment.log_table(filename, tabular_data, headers)
        
    def log_image(self, image=None, figure_name=None, step=None):
        '''
        Logs pyplot figure

        Parameters
        ----------
        figure : pyplot figure, optional in comet mandatory in neptune.
            The default is None, uses global pyplot figure.
        figure_name : STR, optional in comet mandatory in neptune.
             The default is None.
        step : INT, optional
            An index. The default is None.

        Returns
        -------
        None.

        '''
        self.log_image(image, name=figure_name, overwrite=False, image_format="png", image_scale=1.0, \
                       image_shape=None, image_colormap=None, image_minmax=None, image_channels="last", \
                       copy_to_tmp=True, step=step)
    
    
    
    
    def log_hist3d(self, values=None, figure_name=None, step=None):
        '''
        Logs pyplot figure
    
        Parameters
        ----------
        figure : pyplot figure, optional in comet mandatory in neptune.
            The default is None, uses global pyplot figure.
        figure_name : STR, optional in comet mandatory in neptune.
             The default is None.
        step : INT, optional
            An index. The default is None.
    
        Returns
        -------
        None.
    
        '''
        if self.neptune:
            print("not implemented")    
        if self.comet:    
            self.comet_experiment.log_histogram_3d(values, name=figure_name, step=step) 
    
    
    def log_table(self, name=None, data=None, headers=False):
        '''
        

        Parameters
        ----------
        name : str
            Table name
        data : array, list
            
        headers : TYPE, optional
            wether to use headers

        Returns
        -------
        None.

        '''
        self.comet_experiment.log_table(name+'.csv', tabular_data= data, headers = headers )
Ejemplo n.º 22
0
                    # experiment.log_metric("d_loss", D_l)
                    # experiment.log_metric("g_loss", G_l)
                    # experiment.log_metric("current_resolution", current_resolution)
                    # experiment.log_metric("current_mode", 0 if current_mode == 'train' else 1)

                if np.isnan(D_l) or np.isnan(G_l):
                    print('loss is NaN.')
                    exit()

                if step % 1000 == 0:
                    print('epoch: {} step: {} G_loss: {} D_loss: {}'.format(epoch, step, G_l, D_l))
                    # Save figure

                    sampled_images = session.run([samples_for_all_resolutions[sizes.index(current_resolution)]])[0]

                    plot = plt.figure(figsize=(20, 10))
                    for m in range(3):
                        plt.subplot(1, 3, m + 1)
                        plt.imshow(sampled_images[m])

                    plt.savefig('./progress_images/epoch_{0}_{1}_{2}x{2}'.format(epoch, current_mode, current_resolution))
                    experiment.log_figure(figure_name='epoch_{0}_{1}_{2}x{2}'.format(epoch, current_mode, current_resolution))

                    plt.close()

                if step % 5000 == 0:
                    save_path = saver.save(session, './checkpoints/model.ckpt', global_step=total_images_looked_at)

        except tf.errors.OutOfRangeError:
            pass
Ejemplo n.º 23
0
    ## embedding tsne visualizations
    # country
    categembs = sess.run(model.categembs)
    with open('categembs.pkl', 'wb') as f:
        pickle.dump(categembs, f)
    with open('categembs.pkl', 'rb') as f:
        categembs = pickle.load(f)
    embs = categembs['country']
    tsnes = TSNE(n_components=2).fit_transform(embs)
    plt.figure(figsize=(8, 8))
    plt.plot(*tsnes.T, '.')
    for i, tsne in enumerate(tsnes):
        plt.text(*tsne, ' ' + model.categs['country'][i], fontsize=8)
    plt.gca().axis('equal')
    plt.tight_layout()
    print(experiment.log_figure(step=epoch))

    # AS
    embs = categembs['as'][:300]
    tsnes = TSNE(n_components=2).fit_transform(embs)
    plt.figure(figsize=(16, 16))
    plt.plot(*tsnes.T, '.')
    for i, tsne in enumerate(tsnes):
        plt.text(*tsne, ' ' + model.categs['as'][i][3:23], fontsize=8)
    plt.gca().axis('equal')
    plt.tight_layout()
    print(experiment.log_figure(step=epoch))

    # subnet
    embs = categembs['subnet']
    tsnes = TSNE(n_components=2).fit_transform(embs)
Ejemplo n.º 24
0
        progbar.add(valid_inc, [('Train Loss', metrics['train_loss']),
                                ('Validation Loss', metrics['valid_loss']),
                                ('Time (s)', step_time)])
        #Plot on Comet
        experiment.log_metrics(metrics, step=t)
        # Plot on WandB
        wandb.log(metrics, step=t)

    if (t + 1) % save_inc == 0:
        trainer.save_weights(model_path,
                             run_id=wandb.run.id,
                             experiment_key=experiment.get_key())
        if not args.gcbc and not args.images:
            z_enc, z_plan = produce_cluster_fig(next(plotting_dataset),
                                                encoder,
                                                planner,
                                                TEST_DATA_PATHS[0],
                                                num_take=dl.batch_size // 4)

            #Comet
            experiment.log_figure('z_enc', z_enc, step=t)
            experiment.log_figure('z_plan', z_plan, step=t)

            # WandB
            wandb.log({'z_enc': z_enc, 'z_plan': z_plan}, step=t)

            #latent_fig = project_enc_and_plan(ze, zp)
            #latent_img = plot_to_image(latent_fig)

    t += 1
Ejemplo n.º 25
0
                        input_shape=(1,))

    model.add(rbflayer)
    model.add(Dense(1))
    model.compile(loss='mse',
                  optimizer=_OPTIMIZER)
    model.fit(oDataSet.attributes[oData.Training_indexes],
              oDataSet.labels[oData.Training_indexes],
              batch_size=50,
              epochs=epochs,
              verbose=1)

    y_pred = model.predict(oDataSet.attributes[oData.Testing_indexes])
    y_true = oDataSet.labels[oData.Testing_indexes]
    plt.plot(model.history.history['loss'])
    experiment.log_figure(figure=plt, figure_name='Loss curve')
    plt.show()

    random_matrix = np.random.random((1000,1))
    plt.scatter(random_matrix, model.predict(random_matrix), label = "Curva modelo")
    plt.scatter(oDataSet.attributes, oDataSet.labels,label = "Curva dataset")
    plt.legend(loc='upper left', bbox_to_anchor=(1.04, 1))
    experiment.log_figure(figure=plt, figure_name="surface")
    plt.show()

    experiment.log_metric("test_accuracy", mean_squared_error(y_true, y_pred))
    experiment.log_metric("test_accuracy_rmse", np.sqrt(mean_squared_error(y_true, y_pred)))
    experiment.log_metric("beta", best_b)
    experiment.log_metric("neurons", best_p)
    model.save('model.h5')
    experiment.log_asset("model.h5")
Ejemplo n.º 26
0
def train(normal_digit, anomalies, folder, file, p_train, p_test):

    # Create an experiment
    experiment = Experiment(project_name="deep-stats-thesis",
                            workspace="stecaron",
                            disabled=True)
    experiment.add_tag("mnist_conv_ae")

    # General parameters
    DOWNLOAD_MNIST = True
    PATH_DATA = os.path.join(os.path.expanduser("~"), 'Downloads/mnist')
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Define training parameters
    hyper_params = {
        "EPOCH": 75,
        "NUM_WORKERS": 10,
        "BATCH_SIZE": 256,
        "LR": 0.001,
        "TRAIN_SIZE": 4000,
        "TRAIN_NOISE": p_train,
        "TEST_SIZE": 800,
        "TEST_NOISE": p_test,
        # on which class we want to learn outliers
        "CLASS_SELECTED": [normal_digit],
        # which class we want to corrupt our dataset with
        "CLASS_CORRUPTED": anomalies,
        "ALPHA": p_test,
        "MODEL_NAME": "mnist_ae_model",
        "LOAD_MODEL": False,
        "LOAD_MODEL_NAME": "mnist_ae_model"
    }

    # Log experiment parameters
    experiment.log_parameters(hyper_params)

    # Load data
    train_data, test_data = load_mnist(PATH_DATA, download=DOWNLOAD_MNIST)

    # Train the autoencoder
    model = ConvAutoEncoder2()
    optimizer = torch.optim.Adam(model.parameters(), lr=hyper_params["LR"])
    #loss_func = nn.MSELoss()
    loss_func = nn.BCELoss()

    # Build "train" and "test" datasets
    id_maj_train = numpy.random.choice(numpy.where(
        numpy.isin(train_data.train_labels, hyper_params["CLASS_SELECTED"]))[0],
        int((1 - hyper_params["TRAIN_NOISE"]) *
            hyper_params["TRAIN_SIZE"]),
        replace=False)
    id_min_train = numpy.random.choice(numpy.where(
        numpy.isin(train_data.train_labels, hyper_params["CLASS_CORRUPTED"]))[0],
        int(hyper_params["TRAIN_NOISE"] *
            hyper_params["TRAIN_SIZE"]),
        replace=False)
    id_train = numpy.concatenate((id_maj_train, id_min_train))

    id_maj_test = numpy.random.choice(numpy.where(
        numpy.isin(test_data.test_labels, hyper_params["CLASS_SELECTED"]))[0],
        int((1 - hyper_params["TEST_NOISE"]) *
            hyper_params["TEST_SIZE"]),
        replace=False)
    id_min_test = numpy.random.choice(numpy.where(
        numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]))[0],
        int(hyper_params["TEST_NOISE"] *
            hyper_params["TEST_SIZE"]),
        replace=False)
    id_test = numpy.concatenate((id_min_test, id_maj_test))

    train_data.data = train_data.data[id_train]
    train_data.targets = train_data.targets[id_train]

    test_data.data = test_data.data[id_test]
    test_data.targets = test_data.targets[id_test]

    train_data.targets = torch.from_numpy(
        numpy.isin(train_data.train_labels,
                   hyper_params["CLASS_CORRUPTED"])).type(torch.int32)
    test_data.targets = torch.from_numpy(
        numpy.isin(test_data.test_labels,
                   hyper_params["CLASS_CORRUPTED"])).type(torch.int32)

    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=hyper_params["BATCH_SIZE"],
                                   shuffle=True,
                                   num_workers=hyper_params["NUM_WORKERS"])

    test_loader = Data.DataLoader(dataset=test_data,
                                  batch_size=test_data.data.shape[0],
                                  shuffle=False,
                                  num_workers=hyper_params["NUM_WORKERS"])
    model.train()
    if hyper_params["LOAD_MODEL"]:
        model = torch.load(hyper_params["LOAD_MODEL_NAME"])
    else:
        train_mnist(train_loader,
                    model,
                    criterion=optimizer,
                    n_epoch=hyper_params["EPOCH"],
                    experiment=experiment,
                    device=device,
                    model_name=hyper_params["MODEL_NAME"],
                    loss_func=loss_func,
                    loss_type="binary")

    # Compute p-values
    model.to(device)
    pval, test_errors = compute_reconstruction_pval(
        train_loader, model, test_loader, device)
    pval_order = numpy.argsort(pval)

    # Plot p-values
    x_line = numpy.arange(0, len(test_data), step=1)
    y_line = numpy.linspace(0, 1, len(test_data))
    y_adj = numpy.arange(0, len(test_data),
                         step=1) / len(test_data) * hyper_params["ALPHA"]
    zoom = int(0.2 * len(test_data))  # nb of points to zoom

    #index = numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]).astype(int)
    index = numpy.array(test_data.targets).astype(int)

    fig, (ax1, ax2) = plt.subplots(2, 1)

    ax1.scatter(numpy.arange(0, len(pval), 1),
                pval[pval_order],
                c=index[pval_order].reshape(-1))
    ax1.plot(x_line, y_line, color="green")
    ax1.plot(x_line, y_adj, color="red")
    ax1.set_title(
        f'Entire test dataset with {int(hyper_params["TEST_NOISE"] * 100)}% of noise'
    )
    ax1.set_xticklabels([])

    ax2.scatter(numpy.arange(0, zoom, 1),
                pval[pval_order][0:zoom],
                c=index[pval_order].reshape(-1)[0:zoom])
    ax2.plot(x_line[0:zoom], y_line[0:zoom], color="green")
    ax2.plot(x_line[0:zoom], y_adj[0:zoom], color="red")
    ax2.set_title('Zoomed in')
    ax2.set_xticklabels([])

    experiment.log_figure(figure_name="empirical_test_hypothesis",
                          figure=fig,
                          overwrite=True)
    plt.savefig(os.path.join(folder, "pvalues_" + file + ".png"))
    plt.show()

    # Compute some stats
    precision, recall, f1_score, average_precision, roc_auc = test_performances(
        pval, index, hyper_params["ALPHA"])
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1_score}")
    print(f"AUC: {roc_auc}")
    print(f"Average Precison: {average_precision}")
    experiment.log_metric("precision", precision)
    experiment.log_metric("recall", recall)
    experiment.log_metric("f1_score", f1_score)
    experiment.log_metric("auc", roc_auc)
    experiment.log_metric("average_precision", average_precision)

    # Show some examples

    fig, axs = plt.subplots(5, 5)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(25):
        image = test_data.data[pval_order[i]]
        axs[i].imshow(image, cmap='gray')
        axs[i].axis('off')

    experiment.log_figure(figure_name="rejetcted_observations",
                          figure=fig,
                          overwrite=True)
    plt.show()

    fig, axs = plt.subplots(5, 5)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(25):
        image = test_data.data[pval_order[int(len(pval) - 1) - i]]
        axs[i].imshow(image, cmap='gray')
        axs[i].axis('off')

    experiment.log_figure(figure_name="better_observations",
                          figure=fig,
                          overwrite=True)
    plt.show()

    # Save the results in the output file
    col_names = ["timestamp", "precision", "recall", "f1_score",
                 "average_precision", "auc"]
    results_file = os.path.join(folder, "results_" + file + ".csv")
    if os.path.exists(results_file):
        df_results = pandas.read_csv(results_file, names=col_names, header=0)
    else:
        df_results = pandas.DataFrame(columns=col_names)

    df_results = df_results.append(
        pandas.DataFrame(
            numpy.concatenate(
                (numpy.array(
                    datetime.datetime.fromtimestamp(
                        time.time()).strftime('%Y-%m-%d %H:%M:%S')).reshape(1),
                 precision.reshape(1), recall.reshape(1),
                 f1_score.reshape(1), average_precision.reshape(1),
                 roc_auc.reshape(1))).reshape(1, -1), columns=col_names), ignore_index=True)

    df_results.to_csv(results_file)
Ejemplo n.º 27
0
def train(folder, file, p_train, p_test):

    # Create an experiment
    experiment = Experiment(project_name="deep-stats-thesis",
                            workspace="stecaron",
                            disabled=True)
    experiment.add_tag("cars_dogs")

    # General parameters
    PATH_DATA_CARS = os.path.join(os.path.expanduser("~"),
                                  'data/stanford_cars')
    PATH_DATA_DOGS = os.path.join(os.path.expanduser("~"),
                                  'data/stanford_dogs2')

    MEAN = [0.485, 0.456, 0.406]
    STD = [0.229, 0.224, 0.225]
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Define training parameters
    hyper_params = {
        "IMAGE_SIZE": (128, 128),
        "NUM_WORKERS": 10,
        "EPOCH": 20,
        "BATCH_SIZE": 130,
        "LR": 0.001,
        "TRAIN_SIZE": 10000,
        "TRAIN_NOISE": p_train,
        "TEST_SIZE": 1000,
        "TEST_NOISE": p_test,
        "LATENT_DIM": 25,  # latent distribution dimensions
        "ALPHA": p_test,  # level of significance for the test
        "BETA_epoch": [5, 10, 15],
        "BETA": [0, 100, 10],  # hyperparameter to weight KLD vs RCL
        "MODEL_NAME": "vae_model_cars",
        "LOAD_MODEL": False,
        "LOAD_MODEL_NAME": "vae_model_carsscenario_cars_plus"
    }

    # Log experiment parameters
    experiment.log_parameters(hyper_params)

    # Define some transformations
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        #transforms.CenterCrop((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize(mean=MEAN, std=STD)
    ])

    # Load data
    train_x_files, test_x_files, train_y, test_y = define_filenames(
        PATH_DATA_DOGS, PATH_DATA_CARS, hyper_params["TRAIN_SIZE"],
        hyper_params["TEST_SIZE"], hyper_params["TRAIN_NOISE"],
        hyper_params["TEST_NOISE"])

    train_data = DataGenerator(train_x_files,
                               train_y,
                               transform=transform,
                               image_size=hyper_params["IMAGE_SIZE"])

    test_data = DataGenerator(test_x_files,
                              test_y,
                              transform=transform,
                              image_size=hyper_params["IMAGE_SIZE"])

    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=hyper_params["BATCH_SIZE"],
                                   shuffle=True,
                                   num_workers=hyper_params["NUM_WORKERS"])

    test_loader = Data.DataLoader(dataset=test_data,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=hyper_params["NUM_WORKERS"])

    # Load model
    model = SmallCarsConvVAE128(z_dim=hyper_params["LATENT_DIM"])
    optimizer = torch.optim.Adam(model.parameters(), lr=hyper_params["LR"])

    model.to(device)

    model_save = os.path.join(folder, hyper_params["MODEL_NAME"] + file)

    # Train the model
    if hyper_params["LOAD_MODEL"]:
        model.load_state_dict(
            torch.load(f'{hyper_params["LOAD_MODEL_NAME"]}.h5'))
    else:
        train_mnist_vae(
            train_loader,
            # test_loader,
            model,
            criterion=optimizer,
            n_epoch=hyper_params["EPOCH"],
            experiment=experiment,
            beta_list=hyper_params["BETA"],
            beta_epoch=hyper_params["BETA_epoch"],
            model_name=model_save,
            device=device,
            loss_type="perceptual",
            flatten=False)

    # Compute p-values
    model.to(device)
    pval, _ = compute_pval_loaders(train_loader,
                                   test_loader,
                                   model,
                                   device=device,
                                   experiment=experiment,
                                   file=file,
                                   folder=folder)

    pval = 1 - pval  # we test on the tail
    pval_order = numpy.argsort(pval)

    # Plot p-values
    x_line = numpy.arange(0, len(test_data), step=1)
    y_line = numpy.linspace(0, 1, len(test_data))
    y_adj = numpy.arange(0, len(test_data),
                         step=1) / len(test_data) * hyper_params["ALPHA"]
    zoom = int(0.2 * len(test_data))  # nb of points to zoom

    index = test_data.labels

    fig, (ax1, ax2) = plt.subplots(2, 1)

    ax1.scatter(numpy.arange(0, len(pval), 1),
                pval[pval_order],
                c=index[pval_order].reshape(-1))
    ax1.plot(x_line, y_line, color="green")
    ax1.axhline(hyper_params["ALPHA"], color="red")
    #ax1.plot(x_line, y_adj, color="red")
    ax1.set_ylabel(r"Score $(1 - \gamma)$")
    ax1.set_title(
        f'Jeu de données test avec {int(hyper_params["TEST_NOISE"] * 100)}% de contamination'
    )
    ax1.set_xticklabels([])

    ax2.scatter(numpy.arange(0, zoom, 1),
                pval[pval_order][0:zoom],
                c=index[pval_order].reshape(-1)[0:zoom])
    ax2.plot(x_line[0:zoom], y_line[0:zoom], color="green")
    ax2.axhline(hyper_params["ALPHA"], color="red")
    #ax2.plot(x_line[0:zoom], y_adj[0:zoom], color="red")
    ax2.set_ylabel(r"Score $(1 - \gamma)$")
    ax2.set_title('Vue rapprochée')
    ax2.set_xticklabels([])

    experiment.log_figure(figure_name="empirical_test_hypothesis",
                          figure=fig,
                          overwrite=True)
    plt.savefig(os.path.join(folder, "pvalues_" + file + ".pdf"))
    plt.show()

    # Compute some stats
    precision, recall, f1_score, average_precision, roc_auc = test_performances(
        pval, index, hyper_params["ALPHA"])
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1_score}")
    print(f"Average precision: {average_precision}")
    print(f"AUC: {roc_auc}")
    experiment.log_metric("precision", precision)
    experiment.log_metric("recall", recall)
    experiment.log_metric("F1-Score", f1_score)
    experiment.log_metric("average_precision", average_precision)
    experiment.log_metric("AUC", roc_auc)

    # Show some examples

    plt.rcParams['figure.figsize'] = [10, 10]
    fig, axs = plt.subplots(4, 4)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(16):
        image = test_data[pval_order[i]][0].transpose_(0, 2)
        image = denormalize(image, MEAN, STD, device=device).numpy()
        axs[i].imshow(image)
        axs[i].axis('off')

    experiment.log_figure(figure_name="rejetcted_observations",
                          figure=fig,
                          overwrite=True)
    plt.savefig(os.path.join(folder, "rejected_observations_" + file + ".pdf"))
    plt.show()

    fig, axs = plt.subplots(4, 4)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(16):
        image = test_data[pval_order[int(len(pval) - 1) - i]][0].transpose_(
            0, 2)
        image = denormalize(image, MEAN, STD, device=device).numpy()
        axs[i].imshow(image)
        axs[i].axis('off')

    experiment.log_figure(figure_name="better_observations",
                          figure=fig,
                          overwrite=True)
    plt.savefig(os.path.join(folder, "better_observations_" + file + ".pdf"))
    plt.show()

    # Plot some errors
    preds = numpy.zeros(index.shape[0])
    preds[numpy.argwhere(pval <= hyper_params["ALPHA"])] = 1
    false_positive = numpy.where((index != preds) & (index == 1))[0]
    nb_errors = numpy.min([16, false_positive.shape[0]])

    sample_errors = numpy.random.choice(false_positive,
                                        nb_errors,
                                        replace=False)
    fig, axs = plt.subplots(4, 4)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(nb_errors):
        image = test_data[sample_errors[i]][0].transpose_(0, 2)
        image = denormalize(image, MEAN, STD, device=device).numpy()
        axs[i].imshow(image)
        axs[i].axis('off')

    plt.savefig(os.path.join(folder, "false_positive_sample_" + file + ".pdf"))
    plt.show()

    false_negative = numpy.where((index != preds) & (index == 0))[0]
    nb_errors = numpy.min([16, false_negative.shape[0]])

    sample_errors = numpy.random.choice(false_negative,
                                        nb_errors,
                                        replace=False)
    fig, axs = plt.subplots(4, 4)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(nb_errors):
        image = test_data[sample_errors[i]][0].transpose_(0, 2)
        image = denormalize(image, MEAN, STD, device=device).numpy()
        axs[i].imshow(image)
        axs[i].axis('off')

    plt.savefig(os.path.join(folder, "false_negative_sample_" + file + ".pdf"))
    plt.show()

    # Save the results in the output file
    col_names = [
        "timestamp", "precision", "recall", "f1_score", "average_precision",
        "auc"
    ]
    results_file = os.path.join(folder, "results_" + file + ".csv")
    if os.path.exists(results_file):
        df_results = pandas.read_csv(results_file, names=col_names, header=0)
    else:
        df_results = pandas.DataFrame(columns=col_names)

    df_results = df_results.append(pandas.DataFrame(numpy.concatenate(
        (numpy.array(
            datetime.datetime.fromtimestamp(
                time.time()).strftime('%Y-%m-%d %H:%M:%S')).reshape(1),
         precision.reshape(1), recall.reshape(1), f1_score.reshape(1),
         average_precision.reshape(1), roc_auc.reshape(1))).reshape(1, -1),
                                                    columns=col_names),
                                   ignore_index=True)

    df_results.to_csv(results_file)
Ejemplo n.º 28
0
def train(rank, defparams, hyper):

    params = {}
    for param in defparams.keys():
        params[param] = defparams[param]

    hyperp = {}
    for hp in hyper.keys():
        hyperp[hp] = hyper[hp]

    experiment = Experiment(api_key="keGmeIz4GfKlQZlOP6cit4QOi",
                            project_name="hadron-shower",
                            workspace="engineren")
    experiment.add_tag(params['exp'])

    experiment.log_parameters(hyperp)

    device = torch.device("cuda")
    torch.manual_seed(params["seed"])

    world_size = int(os.environ["SLURM_NNODES"])
    rank = int(os.environ["SLURM_PROCID"])

    dist.init_process_group(backend='nccl',
                            world_size=world_size,
                            rank=rank,
                            init_method=params["DDP_init_file"])

    aD = DCGAN_D(hyperp["ndf"]).to(device)
    aG = DCGAN_G(hyperp["ngf"], hyperp["z"]).to(device)
    aE = energyRegressor().to(device)
    aP = PostProcess_Size1Conv_EcondV2(48,
                                       13,
                                       3,
                                       128,
                                       bias=True,
                                       out_funct='none').to(device)

    optimizer_g = torch.optim.Adam(aG.parameters(),
                                   lr=hyperp["L_gen"],
                                   betas=(0.5, 0.9))
    optimizer_d = torch.optim.Adam(aD.parameters(),
                                   lr=hyperp["L_crit"],
                                   betas=(0.5, 0.9))
    optimizer_e = torch.optim.SGD(aE.parameters(), lr=hyperp["L_calib"])
    optimizer_p = torch.optim.Adam(aP.parameters(),
                                   lr=hyperp["L_post"],
                                   betas=(0.5, 0.9))

    assert torch.backends.cudnn.enabled, "NVIDIA/Apex:Amp requires cudnn backend to be enabled."
    torch.backends.cudnn.benchmark = True

    # Initialize Amp
    models, optimizers = amp.initialize([aG, aD], [optimizer_g, optimizer_d],
                                        opt_level="O1",
                                        num_losses=2)

    #aD = nn.DataParallel(aD)
    #aG = nn.DataParallel(aG)
    #aE = nn.DataParallel(aE)

    aG, aD = models
    optimizer_g, optimizer_d = optimizers

    aG = nn.parallel.DistributedDataParallel(aG, device_ids=[0])
    aD = nn.parallel.DistributedDataParallel(aD, device_ids=[0])
    aE = nn.parallel.DistributedDataParallel(aE, device_ids=[0])
    aP = nn.parallel.DistributedDataParallel(aP, device_ids=[0])

    experiment.set_model_graph(str(aG), overwrite=False)
    experiment.set_model_graph(str(aD), overwrite=False)

    if params["restore_pp"]:
        aP.load_state_dict(
            torch.load(params["restore_path_PP"] + params["post_saved"],
                       map_location=torch.device(device)))

    if params["restore"]:
        checkpoint = torch.load(params["restore_path"])
        aG.load_state_dict(checkpoint['Generator'])
        aD.load_state_dict(checkpoint['Critic'])
        optimizer_g.load_state_dict(checkpoint['G_optimizer'])
        optimizer_d.load_state_dict(checkpoint['D_optimizer'])
        itr = checkpoint['iteration']

    else:
        aG.apply(weights_init)
        aD.apply(weights_init)
        itr = 0

    if params["c0"]:
        aE.apply(weights_init)
    elif params["c1"]:
        aE.load_state_dict(
            torch.load(params["calib_saved"],
                       map_location=torch.device(device)))

    one = torch.tensor(1.0).to(device)
    mone = (one * -1).to(device)

    print('loading data...')
    paths_list = [
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part1.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part2.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part3.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part4.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part5.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part6.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part7.hdf5'
    ]

    train_data = PionsDataset(paths_list, core=True)

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_data, num_replicas=world_size, rank=rank)

    dataloader = DataLoader(train_data,
                            batch_size=hyperp["batch_size"],
                            num_workers=0,
                            shuffle=False,
                            drop_last=True,
                            pin_memory=True,
                            sampler=train_sampler)

    print('done')

    #scheduler_g = optim.lr_scheduler.StepLR(optimizer_g, step_size=1, gamma=params["gamma_g"])
    #scheduler_d = optim.lr_scheduler.StepLR(optimizer_d, step_size=1, gamma=params["gamma_crit"])
    #scheduler_e = optim.lr_scheduler.StepLR(optimizer_e, step_size=1, gamma=params["gamma_calib"])

    #writer = SummaryWriter()

    e_criterion = nn.L1Loss()  # for energy regressor training

    dataiter = iter(dataloader)

    BATCH_SIZE = hyperp["batch_size"]
    LATENT = hyperp["z"]
    EXP = params["exp"]
    KAPPA = hyperp["kappa"]
    LAMBD = hyperp["lambda"]
    ## Post-Processing
    LDP = hyperp["LDP"]
    wMMD = hyperp["wMMD"]
    wMSE = hyperp["wMSE"]

    ## IO paths
    OUTP = params['output_path']

    for iteration in range(50000):

        iteration += itr + 1
        #---------------------TRAIN D------------------------
        for p in aD.parameters():  # reset requires_grad
            p.requires_grad_(True)  # they are set to False below in training G

        for e in aE.parameters():  # reset requires_grad (constrainer)
            e.requires_grad_(True)  # they are set to False below in training G

        for i in range(hyperp["ncrit"]):

            aD.zero_grad()
            aE.zero_grad()

            noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT))
            noise = torch.from_numpy(noise).float()
            noise = noise.view(
                -1, LATENT, 1, 1,
                1)  #[BS, nz]  --> [Bs,nz,1,1,1] Needed for Generator
            noise = noise.to(device)

            batch = next(dataiter, None)

            if batch is None:
                dataiter = iter(dataloader)
                batch = dataiter.next()

            real_label = batch['energy']  ## energy label
            real_label = real_label.to(device)

            with torch.no_grad():
                noisev = noise  # totally freeze G, training D

            fake_data = aG(noisev, real_label).detach()

            real_data = batch['shower']  # 48x48x48 calo image
            real_data = real_data.to(device)
            real_data.requires_grad_(True)

            #### supervised-training for energy regressor!
            if params["train_calib"]:
                output = aE(real_data.float())
                e_loss = e_criterion(output, real_label.view(BATCH_SIZE, 1))
                e_loss.backward()
                optimizer_e.step()

            ######

            # train with real data

            disc_real = aD(real_data.float(), real_label.float())

            # train with fake data
            fake_data = fake_data.unsqueeze(
                1)  ## transform to [BS, 1, 48, 48, 48]
            disc_fake = aD(fake_data, real_label.float())

            # train with interpolated data
            gradient_penalty = calc_gradient_penalty(aD,
                                                     real_data.float(),
                                                     fake_data,
                                                     real_label,
                                                     BATCH_SIZE,
                                                     device,
                                                     DIM=13)

            ## wasserstein-1 distace
            w_dist = torch.mean(disc_fake) - torch.mean(disc_real)
            # final disc cost
            disc_cost = torch.mean(disc_fake) - torch.mean(
                disc_real) + LAMBD * gradient_penalty

            with amp.scale_loss(disc_cost, optimizer_d) as scaled_loss:
                scaled_loss.backward()

            optimizer_d.step()

            #--------------Log to COMET ML ----------
            if i == hyperp["ncrit"] - 1:
                experiment.log_metric("L_crit", disc_cost, step=iteration)
                experiment.log_metric("gradient_pen",
                                      gradient_penalty,
                                      step=iteration)
                experiment.log_metric("Wasserstein Dist",
                                      w_dist,
                                      step=iteration)
                if params["train_calib"]:
                    experiment.log_metric("L_const", e_loss, step=iteration)

        #---------------------TRAIN G------------------------
        for p in aD.parameters():
            p.requires_grad_(False)  # freeze D

        for c in aE.parameters():
            c.requires_grad_(False)  # freeze C

        gen_cost = None
        for i in range(hyperp["ngen"]):

            aG.zero_grad()

            noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT))
            noise = torch.from_numpy(noise).float()
            noise = noise.view(
                -1, LATENT, 1, 1,
                1)  #[BS, nz]  --> [Bs,nz,1,1,1] Needed for Generator
            noise = noise.to(device)

            batch = next(dataiter, None)

            if batch is None:
                dataiter = iter(dataloader)
                batch = dataiter.next()

            real_label = batch['energy']  ## energy label
            real_label = real_label.to(device)

            noise.requires_grad_(True)

            real_data = batch['shower']  # 48x48x48 calo image
            real_data = real_data.to(device)

            fake_data = aG(noise, real_label.float())
            fake_data = fake_data.unsqueeze(
                1)  ## transform to [BS, 1, 48, 48, 48]

            ## calculate loss function
            gen_cost = aD(fake_data.float(), real_label.float())

            ## label conditioning
            #output_g = aE(fake_data)
            #output_r = aE(real_data.float())

            output_g = 0.0  #for now
            output_r = 0.0  #for now

            aux_fake = (output_g - real_label)**2
            aux_real = (output_r - real_label)**2

            aux_errG = torch.abs(aux_fake - aux_real)

            ## Total loss function for generator
            g_cost = -torch.mean(gen_cost) + KAPPA * torch.mean(aux_errG)

            with amp.scale_loss(g_cost, optimizer_g) as scaled_loss_G:
                scaled_loss_G.backward()

            optimizer_g.step()

            #--------------Log to COMET ML ----------
            experiment.log_metric("L_Gen", g_cost, step=iteration)

            ## plot example image
            if iteration % 100 == 0.0 or iteration == 1:
                image = fake_data.view(-1, 48, 13, 13).cpu().detach().numpy()
                cmap = mpl.cm.viridis
                cmap.set_bad('white', 1.)
                figExIm = plt.figure(figsize=(6, 6))
                axExIm1 = figExIm.add_subplot(1, 1, 1)
                image1 = np.sum(image[0], axis=0)
                masked_array1 = np.ma.array(image1, mask=(image1 == 0.0))
                im1 = axExIm1.imshow(masked_array1,
                                     filternorm=False,
                                     interpolation='none',
                                     cmap=cmap,
                                     vmin=0.01,
                                     vmax=100,
                                     norm=mpl.colors.LogNorm(),
                                     origin='lower')
                figExIm.patch.set_facecolor('white')
                axExIm1.set_xlabel('y [cells]', family='serif')
                axExIm1.set_ylabel('x [cells]', family='serif')
                figExIm.colorbar(im1)

                experiment.log_figure(figure=plt, figure_name="x-y")

                figExIm = plt.figure(figsize=(6, 6))
                axExIm2 = figExIm.add_subplot(1, 1, 1)
                image2 = np.sum(image[0], axis=1)
                masked_array2 = np.ma.array(image2, mask=(image2 == 0.0))
                im2 = axExIm2.imshow(masked_array2,
                                     filternorm=False,
                                     interpolation='none',
                                     cmap=cmap,
                                     vmin=0.01,
                                     vmax=100,
                                     norm=mpl.colors.LogNorm(),
                                     origin='lower')
                figExIm.patch.set_facecolor('white')
                axExIm2.set_xlabel('y [cells]', family='serif')
                axExIm2.set_ylabel('z [layers]', family='serif')
                figExIm.colorbar(im2)

                experiment.log_figure(figure=plt, figure_name="y-z")

                figExIm = plt.figure(figsize=(6, 6))
                axExIm3 = figExIm.add_subplot(1, 1, 1)
                image3 = np.sum(image[0], axis=2)
                masked_array3 = np.ma.array(image3, mask=(image3 == 0.0))
                im3 = axExIm3.imshow(masked_array3,
                                     filternorm=False,
                                     interpolation='none',
                                     cmap=cmap,
                                     vmin=0.01,
                                     vmax=100,
                                     norm=mpl.colors.LogNorm(),
                                     origin='lower')
                figExIm.patch.set_facecolor('white')
                axExIm3.set_xlabel('x [cells]', family='serif')
                axExIm3.set_ylabel('z [layers]', family='serif')
                figExIm.colorbar(im3)
                #experiment.log_metric("L_aux", aux_errG, step=iteration)
                experiment.log_figure(figure=plt, figure_name="x-z")

                ## E-sum monitoring

                figEsum = plt.figure(figsize=(6, 6 * 0.77 / 0.67))
                axEsum = figEsum.add_subplot(1, 1, 1)
                etot_real = getTotE(real_data.cpu().detach().numpy(),
                                    xbins=13,
                                    ybins=13)
                etot_fake = getTotE(image, xbins=13, ybins=13)

                axEsumReal = axEsum.hist(etot_real,
                                         bins=25,
                                         range=[0, 1500],
                                         weights=np.ones_like(etot_real) /
                                         (float(len(etot_real))),
                                         label="orig",
                                         color='blue',
                                         histtype='stepfilled')

                axEsumFake = axEsum.hist(etot_fake,
                                         bins=25,
                                         range=[0, 1500],
                                         weights=np.ones_like(etot_fake) /
                                         (float(len(etot_fake))),
                                         label="generated",
                                         color='red',
                                         histtype='stepfilled')

                axEsum.text(0.25,
                            0.81,
                            "WGAN",
                            horizontalalignment='left',
                            verticalalignment='top',
                            transform=axEsum.transAxes,
                            color='red')
                axEsum.text(0.25,
                            0.87,
                            'GEANT 4',
                            horizontalalignment='left',
                            verticalalignment='top',
                            transform=axEsum.transAxes,
                            color='blue')

                experiment.log_figure(figure=plt, figure_name="E-sum")

        #end = timer()
        #print(f'---train G elapsed time: {end - start}')

        if params["train_postP"]:
            #---------------------TRAIN P------------------------
            for p in aD.parameters():
                p.requires_grad_(False)  # freeze D

            for c in aG.parameters():
                c.requires_grad_(False)  # freeze G

            lossP = None
            for i in range(1):

                noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT))
                noise = torch.from_numpy(noise).float()
                noise = noise.view(
                    -1, LATENT, 1, 1,
                    1)  #[BS, nz]  --> [Bs,nz,1,1,1] Needed for Generator
                noise = noise.to(device)

                batch = next(dataiter, None)

                if batch is None:
                    dataiter = iter(dataloader)
                    batch = dataiter.next()

                real_label = batch['energy']  ## energy label
                real_label = real_label.to(device)

                noise.requires_grad_(True)

                real_data = batch['shower']  # calo image
                real_data = real_data.to(device)

                ## forward pass to generator
                fake_data = aG(noise, real_label.float())
                fake_data = fake_data.unsqueeze(
                    1)  ## transform to [BS, 1, layer, size, size]

                ### first LossD_P
                fake_dataP = aP(fake_data.float(), real_label.float())
                lossD_P = aD(fake_dataP.float(), real_label.float())
                lossD_P = lossD_P.mean()

                ## lossFixP

                real_sorted = real_data.view(BATCH_SIZE, -1)
                fake_sorted = fake_dataP.view(BATCH_SIZE, -1)

                real_sorted, _ = torch.sort(real_sorted,
                                            dim=1,
                                            descending=True)  #.view(900,1)
                fake_sorted, _ = torch.sort(fake_sorted,
                                            dim=1,
                                            descending=True)  #.view(900,1)

                lossFixPp1 = mmd_hit_sortKernel(real_sorted.float(),
                                                fake_sorted,
                                                kernel_size=100,
                                                stride=50,
                                                cutoff=2000,
                                                alpha=200)

                lossFixPp2 = F.mse_loss(fake_dataP.view(BATCH_SIZE, -1),
                                        fake_data.detach().view(
                                            BATCH_SIZE, -1),
                                        reduction='mean')

                lossFixP = wMMD * lossFixPp1 + wMSE * lossFixPp2

                lossP = LDP * lossD_P - lossFixP

                lossP.backward(mone)
                optimizer_p.step()

        if iteration % 100 == 0 or iteration == 1:
            print('iteration: {}, critic loss: {}'.format(
                iteration,
                disc_cost.cpu().data.numpy()))
            if rank == 0:
                torch.save(
                    {
                        'Generator': aG.state_dict(),
                        'Critic': aD.state_dict(),
                        'G_optimizer': optimizer_g.state_dict(),
                        'D_optimizer': optimizer_d.state_dict(),
                        'iteration': iteration
                    }, OUTP + '{0}/wgan_itrs_{1}.pth'.format(EXP, iteration))
                if params["train_calib"]:
                    torch.save(
                        aE.state_dict(),
                        OUTP + '/{0}/netE_itrs_{1}.pth'.format(EXP, iteration))
                if params["train_postP"]:
                    torch.save(
                        aP.state_dict(),
                        OUTP + '{0}/netP_itrs_{1}.pth'.format(EXP, iteration))
ap, f1_max, precision, recall, f1_max_th, fig_pre_rec, fig_th_pre_rec = precision_recall(y_test, 
                                                       score_test,
                                                       limit=1000,
                                                       label_anomaly=labels[0])

#fig_score_train = plot_score(score_train, 'train')
#fig_score_val = plot_score(score_val, 'validation')
fig_score_test = plot_score(score_test, 'test', 10, labels=y_test, th=f1_max_th)

fig_cumsum = pca.plot_cumsum()

y_pred, conf_matrix = predict(score_test, f1_max_th, y_test, labels)


experiment.add_tags([data, metric])
parameters = {'var': var, 'pc': pca.pcs_, 'metric': metric}
experiment.log_parameters(parameters)
experiment.log_metric('ap', ap)
experiment.log_metric('f1', f1_max)
experiment.log_metric('precision', precision)
experiment.log_metric('recall', recall)
experiment.log_metric('train_time', pca.time_)
experiment.log_parameter('th_f1', f1_max_th)
experiment.log_figure('cumsum', fig_cumsum)
experiment.log_figure('score_test',fig_score_test)
experiment.log_figure('precision_recall',fig_pre_rec)
experiment.log_figure('th_pre_rec_f1', fig_th_pre_rec)
experiment.log_confusion_matrix(matrix=conf_matrix, labels=labels)

experiment.end()
Ejemplo n.º 30
0
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        """
        Loss_D - discriminator loss calculated as the sum of losses for the all real and all fake batches (log(D(x))+log(D(G(z)))).
        Loss_G - generator loss calculated as log(D(G(z)))
        D(x) - the average output (across the batch) of the discriminator for the all real batch.
               This should start close to 1 then theoretically converge to 0.5 when G gets better. Think about why this is.
        D(G(z)) - average discriminator outputs for the all fake batch. The first number is before D is updated and the second number
                  is after D is updated. These numbers should start near 0 and converge to 0.5 as G gets better. Think about why this is.
        """

        # Output training stats
        if i % 10 == 0:
            print('step:', steps, ' epoch:', epoch)
            experiment.log_metrics({'Loss_D': errD.item(), 'Loss_G': errG.item(), 'D(x)': D_x, 'D(G(z1))': D_G_z1, 'D(G(z2))': D_G_z2})

        if (steps % 100 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
            fixed_noise = torch.randn(3, nz, 1, 1, device=device)
            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu().numpy()

                plot = plt.figure(figsize=(20, 10))
                for m in range(3):
                    plt.subplot(1, 3, m + 1)
                    plt.imshow((fake[m].transpose((1, 2, 0))+1)/2)
                experiment.log_figure(figure_name='epoch_{}_{}'.format(epoch, steps))
                plt.close()

        steps += 1
Ejemplo n.º 31
0
class ModelTrainer:
    def __init__(self, model, dataloader, args):
        self.model = model
        self.args = args
        self.data = dataloader
        self.metric = args.metric

        if (dataloader is not None):
            self.frq_log = len(dataloader['train']) // args.frq_log

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        model.to(self.device)

        if args.optimizer == 'sgd':
            self.optimizer = optim.SGD(model.parameters(),
                                       lr=args.lr,
                                       momentum=args.momentum,
                                       weight_decay=args.weight_decay)
        elif args.optimizer == 'adam':
            self.optimizer = optim.Adam(model.parameters(),
                                        lr=args.lr,
                                        betas=(args.beta1, 0.999),
                                        weight_decay=args.weight_decay)
        else:
            raise Exception('--optimizer should be one of {sgd, adam}')

        if args.scheduler == 'set':
            self.scheduler = optim.lr_scheduler.LambdaLR(
                self.optimizer,
                lambda epoch: 10**(epoch / args.scheduler_factor))
        elif args.scheduler == 'auto':
            self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer,
                mode='min',
                factor=args.scheduler_factor,
                patience=5,
                verbose=True,
                threshold=0.0001,
                threshold_mode='rel',
                cooldown=0,
                min_lr=0,
                eps=1e-08)

        self.experiment = Experiment(api_key=args.comet_key,
                                     project_name=args.comet_project,
                                     workspace=args.comet_workspace,
                                     auto_weight_logging=True,
                                     auto_metric_logging=False,
                                     auto_param_logging=False)

        self.experiment.set_name(args.name)
        self.experiment.log_parameters(vars(args))
        self.experiment.set_model_graph(str(self.model))

    def train_one_epoch(self, epoch):

        self.model.train()
        train_loader = self.data['train']
        train_loss = 0
        correct = 0

        comet_offset = epoch * len(train_loader)

        for batch_idx, (data, target) in tqdm(enumerate(train_loader),
                                              leave=True,
                                              total=len(train_loader)):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.cross_entropy(output, target, reduction='sum')
            loss.backward()
            self.optimizer.step()

            pred = output.argmax(dim=1, keepdim=True)
            acc = pred.eq(target.view_as(pred)).sum().item()
            train_loss += loss.item()
            correct += acc

            loss = loss.item() / len(data)
            acc = 100. * acc / len(data)

            comet_step = comet_offset + batch_idx
            self.experiment.log_metric('batch_loss', loss, comet_step, epoch)
            self.experiment.log_metric('batch_acc', acc, comet_step, epoch)

            if (batch_idx + 1) % self.frq_log == 0:
                self.experiment.log_metric('log_loss', loss, comet_step, epoch)
                self.experiment.log_metric('log_acc', acc, comet_step, epoch)
                print('Epoch: {} [{}/{}]\tLoss: {:.6f}\tAcc: {:.2f}%'.format(
                    epoch + 1, (batch_idx + 1) * len(data),
                    len(train_loader.dataset), loss, acc))

        train_loss /= len(train_loader.dataset)
        acc = 100. * correct / len(train_loader.dataset)

        comet_step = comet_offset + len(train_loader) - 1
        self.experiment.log_metric('loss', train_loss, comet_step, epoch)
        self.experiment.log_metric('acc', acc, comet_step, epoch)

        print(
            'Epoch: {} [Done]\tLoss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)'.format(
                epoch + 1, train_loss, correct, len(train_loader.dataset),
                acc))

        return {'loss': train_loss, 'acc': acc}

    def train(self):

        self.log_cmd()
        best = -1
        history = {'lr': [], 'train_loss': []}

        try:
            print(">> Training %s" % self.model.name)
            for epoch in range(self.args.nepoch):
                with self.experiment.train():
                    train_res = self.train_one_epoch(epoch)

                with self.experiment.validate():
                    print("\nvalidation...")
                    comet_offset = (epoch + 1) * len(self.data['train']) - 1
                    res = self.val(self.data['val'], comet_offset, epoch)

                if res[self.metric] > best:
                    best = res[self.metric]
                    self.save_weights(epoch)

                if self.args.scheduler == 'set':
                    lr = self.optimizer.param_groups[0]['lr']
                    history['lr'].append(lr)
                    history['train_loss'].append(train_res['loss'])

                    self.scheduler.step(epoch + 1)
                    lr = self.optimizer.param_groups[0]['lr']
                    print('learning rate changed to: %.10f' % lr)

                elif self.args.scheduler == 'auto':
                    self.scheduler.step(train_res['loss'])
        finally:
            print(">> Training model %s. [Stopped]" % self.model.name)
            self.experiment.log_asset_folder(os.path.join(
                self.args.outf, self.args.name, 'weights'),
                                             step=None,
                                             log_file_name=False,
                                             recursive=False)
            if self.args.scheduler == 'set':
                plt.semilogx(history['lr'], history['train_loss'])
                plt.grid(True)
                self.experiment.log_figure(figure=plt)
                plt.show()

    def val(self, val_loader, comet_offset=-1, epoch=-1):
        self.model.eval()
        test_loss = 0
        correct = 0

        labels = list(range(self.args.nclass))
        cm = np.zeros((len(labels), len(labels)))

        with torch.no_grad():
            for data, target in tqdm(val_loader,
                                     leave=True,
                                     total=len(val_loader)):
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                test_loss += F.cross_entropy(output, target,
                                             reduction='sum').item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

                pred = pred.view_as(target).data.cpu().numpy()
                target = target.data.cpu().numpy()
                cm += confusion_matrix(target, pred, labels=labels)

        test_loss /= len(val_loader.dataset)
        accuracy = 100. * correct / len(val_loader.dataset)

        print('Evaluation: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.
              format(test_loss, correct, len(val_loader.dataset), accuracy))

        res = {'loss': test_loss, 'acc': accuracy}

        self.experiment.log_metrics(res, step=comet_offset, epoch=epoch)
        self.experiment.log_confusion_matrix(
            matrix=cm,
            labels=[ClassDict.getName(x) for x in labels],
            title='confusion matrix after epoch %03d' % epoch,
            file_name="confusion_matrix_%03d.json" % epoch)

        return res

    def test(self):
        self.load_weights()
        with self.experiment.test():
            print('\ntesting....')
            res = self.val(self.data['test'])

    def log_cmd(self):
        d = vars(self.args)
        cmd = '!python main.py \\\n'
        tab = '    '

        for k, v in d.items():
            if v is None or v == '' or (isinstance(v, bool) and v is False):
                continue

            if isinstance(v, bool):
                arg = '--{} \\\n'.format(k)
            else:
                arg = '--{} {} \\\n'.format(k, v)

            cmd = cmd + tab + arg

        # print(cmd);
        self.experiment.log_text(cmd)

    def save_weights(self, epoch: int):

        weight_dir = os.path.join(self.args.outf, self.args.name, 'weights')
        if not os.path.exists(weight_dir):
            os.makedirs(weight_dir)

        torch.save({
            'epoch': epoch,
            'state_dict': self.model.state_dict()
        }, os.path.join(weight_dir, 'model.pth'))

    def load_weights(self):

        path_g = self.args.weights_path

        if path_g is None:
            weight_dir = os.path.join(self.args.outf, self.args.name,
                                      'weights')
            path_g = os.path.join(weight_dir, 'model.pth')

        print('>> Loading weights...')
        weights_g = torch.load(path_g, map_location=self.device)['state_dict']
        self.model.load_state_dict(weights_g)
        print('   Done.')

    def predict(self, x):
        x = x / 2**15
        self.model.eval()
        with torch.no_grad():
            x = torch.from_numpy(x).float()
            x = self.transform(x)
            x = x.unsqueeze(0)
            x = self.model(x)
            x = F.softmax(x, dim=1)
            x = x.numpy()
        return x