Example #1
0
def main(arguments):
    with open(arguments.filepath, 'r') as fp:
        json_exp = json.load(fp)

    neptune.init(api_token=arguments.neptune_api_token,
                 project_qualified_name=arguments.project_name)

    with neptune.create_experiment(
            name=json_exp['name'],
            description=json_exp['description'],
            params=json_exp['params'],
            properties=json_exp['properties'],
            tags=json_exp['tags'],
            upload_source_files=json_exp['upload_source_files']):

        for name, channel_xy in json_exp['send_metric'].items():
            for x, y in zip(channel_xy['x'], channel_xy['y']):
                neptune.send_metric(name, x=x, y=y)

        for name, channel_xy in json_exp['send_text'].items():
            for x, y in zip(channel_xy['x'], channel_xy['y']):
                neptune.send_text(name, x=x, y=y)

        for name, channel_xy in json_exp['send_image'].items():
            for x, y in zip(channel_xy['x'], channel_xy['y']):
                neptune.send_image(name, x=x, y=y)
Example #2
0
 def plot_latent_2d(self, test_encode, test_targets, test_loss):
     if self.trainer.distribution == 'flower':
         test_encode = test_encode * 2 - 1.
     # save encoded samples plot
     plt.figure(figsize=(10, 10))
     #colordict = {0 : 'black', 1 : 'brown', 2 : 'b', 3 : 'cyan', 4 : 'g', 5 : 'lime', 6 : 'm', 7 : 'r', 8 : 'orange', 9 : 'y'}
     #for k in range(len(test_encode)):
     #    plt.scatter(test_encode[k, 0], test_encode[k, 1], c=colordict[test_targets[k]])
     #plt.scatter(test_encode[:, 0], test_encode[:, 1], c=(10 * test_targets), cmap=plt.cm.Spectral)
     plt.scatter(test_encode[:, self.projone],
                 test_encode[:, self.projtwo],
                 c=(10 * test_targets),
                 cmap=plt.cm.Spectral)
     plt.colorbar()
     if self.trainer.distribution == 'gaussflower':
         plt.xlim([-6, 6])
         plt.ylim([-6, 6])
     else:
         plt.xlim([-1.5, 1.5])
         plt.ylim([-1.5, 1.5])
     #plt.title('Test Latent Space\nLoss: {:.5f}'.format(test_loss))
     filename = '{}/test_latent_epoch_{}.pdf'.format(
         self.imagesdir, self.epoch + 1)
     plt.savefig(filename)
     filename = '{}/test_latent_epoch_{}.png'.format(
         self.imagesdir, self.epoch + 1)
     plt.savefig(filename)
     plt.close()
     neptune.send_image('plot_latent_2d', x=self.global_iters, y=filename)
Example #3
0
    def _send_image_channels(self):
        self.model.eval()
        pred_masks = self.get_prediction_masks()
        self.model.train()

        for name, pred_mask in pred_masks.items():
            for i, image_duplet in enumerate(pred_mask):
                h, w = image_duplet.shape[1:]
                image_glued = np.zeros((h, 2 * w + 10))

                image_glued[:, :w] = image_duplet[0, :, :]
                image_glued[:, (w + 10):] = image_duplet[1, :, :]

                pill_image = Image.fromarray(
                    (image_glued * 255.).astype(np.uint8))
                h_, w_ = image_glued.shape
                pill_image = pill_image.resize(
                    (int(self.image_resize * w_), int(self.image_resize * h_)),
                    Image.ANTIALIAS)

                neptune.send_image('{} {}'.format(self.model_name, name),
                                   pill_image)

                if i == self.image_nr:
                    break
Example #4
0
def save_scatter(x, y, filedir, name, it, other_extensions = ['pdf']):
    filename = '{}/{}_epoch_{}.'.format(filedir, name, it+1)
    extensions = ['png'] + other_extensions
    plt.scatter(x, y, s=1)
    for ext in extensions:
        plt.savefig(filename+'{}'.format(ext))
    plt.close()
    neptune.send_image(name, x=it, y=filename+'png')
Example #5
0
def prepare_plots3(target_vals,epoch, METRICSPATH):

    cfm_file = f"{METRICSPATH}/confusion-{epoch}.png"
    bok_file = f"{METRICSPATH}/ranking_{epoch}.html"
   

    classes, rankings, preds, pred_rank = [],[],[],[]
    
    for t_ in target_vals:
        cl_,ra_,pr_,em_ = t_
        classes.append(cl_)
        rankings.append(ra_)
        preds.append(pr_)
        pred_rank.append(em_)



    classes = np.squeeze(np.concatenate(classes))
    rankings = np.squeeze(np.concatenate(rankings))
    predictions = np.concatenate([softmax(p,axis=0) for p in preds])
    pred_rank = np.concatenate(pred_rank)
    
    activations = np.argmax(predictions,axis=1) 
    conf_mat = confusion_matrix(classes,activations)
    fig = plt.figure(figsize=[10,8])
    plot_confusion_matrix(conf_mat, classes=class_names, normalize=False,
                      title=f'Confusion matrix epoch {epoch}')
    plt.savefig(cfm_file,format="png")
    pil_image = fig2pil(fig)
    neptune.send_image('conf_mat', pil_image)


    df = pd.DataFrame(data={ 'tar': rankings, 'pred': pred_rank, 'class': classes})

    
    palette = magma(num_of_classes + 1)
    p = figure(plot_width=600, plot_height=800, title=f"Ranking by exercise, epoch {epoch}")
    p.xgrid.grid_line_color = None
    p.xaxis.axis_label = 'Target ranking'
    p.yaxis.axis_label = 'Predicted ranking'
    

    
    for cl in range(num_of_classes):
        if cl == 6:
            continue
        df2 = df.loc[df['class']==cl]
        p.circle(x=jitter('tar',0.3), y='pred', size=8, alpha=0.1, color=palette[cl], legend=class_names[cl], source=df2 )
        p.line(x='tar', y='pred', line_width=2, alpha=0.5, color=palette[cl],legend=class_names[cl], source=df2.groupby(by="tar").mean())
    p.legend.location = "top_left"
    p.legend.click_policy="hide"
    output_file(bok_file, title="Ranking by exercise")
    save(p)
    pil_image2 = get_screenshot_as_png(p)
    neptune.send_image('rank_distances', pil_image2)
Example #6
0
def bias_benchmark(task, feature, fraction_range, plot=True):

    if task == 'classification':
        get_datasets = get_classification_datasets
    else:
        get_datasets = get_regression_datasets

    SEED = 42

    for d in get_datasets():
        X_train, X_test, y_train, y_test, dataset_name = d
        print(dataset_name)
        X = np.concatenate([X_train, X_test], axis=0)
        y = np.concatenate([y_train, y_test], axis=0)

        features = [f'f{i + 1}' for i in range(X.shape[1])]
        df = pd.DataFrame(X, columns=features)
        image_path = f'./logs/{task}_{dataset_name}_{feature}'

        correlations, rf_scores, sf_scores, permutation_importances = bias_experiment(
            df, y, task, feature, fraction_range, SEED)
        if plot:
            plot_bias(fraction_range, correlations, rf_scores, sf_scores,
                      permutation_importances, dataset_name,
                      image_path + '.png')

            # Log chart and raw results into Neptune
            neptune.send_image(f'{dataset_name}', image_path + '.png')

        results_dict = {
            'correlations': correlations.tolist(),
            'rf_scores': rf_scores.tolist(),
            'sf_scores': sf_scores.tolist(),
            'permutation_importances': permutation_importances
        }
        results_json = json.dumps(results_dict)
        result_file_path = f'./logs/{task}_{dataset_name}_{feature}_results.json'
        with open(result_file_path, 'w+') as f:
            json.dump(results_json, f)
            print(f'Saved results to: {result_file_path}')
        neptune.log_artifact(result_file_path)
Example #7
0
    def _send_image_channels(self):
        self.model.eval()
        image_triplets = self._get_image_triplets()
        if self.image_nr is not None:
            image_triplets = image_triplets[:self.image_nr]
        self.model.train()

        for i, image_triplet in enumerate(image_triplets):
            h, w = image_triplet.shape[1:]
            image_glued = np.zeros((h, 3 * w + 20))

            image_glued[:, :w] = image_triplet[0, :, :]
            image_glued[:, (w + 10):(2 * w + 10)] = image_triplet[1, :, :]
            image_glued[:, (2 * w + 20):] = image_triplet[2, :, :]

            pill_image = Image.fromarray((image_glued * 255.).astype(np.uint8))
            h_, w_ = image_glued.shape
            pill_image = pill_image.resize(
                (int(self.image_resize * w_), int(self.image_resize * h_)),
                Image.ANTIALIAS)

            neptune.send_image('{} predictions'.format(self.model_name),
                               pill_image)
Example #8
0
    def handle_files_and_images(self):
        # image
        # `image_name` and `description` will be lost (`send_image` the same as `log_image`)
        neptune.send_image("image",
                           self.img_path,
                           name="name",
                           description="desc")

        # artifact with default dest
        neptune.send_artifact(self.text_file_path)
        exp = neptune.get_experiment()
        with self.with_check_if_file_appears("text.txt"):
            exp.download_artifact("text.txt")
        with self.with_check_if_file_appears("custom_dest/text.txt"):
            exp.download_artifact("text.txt", "custom_dest")

        # artifact with custom dest
        neptune.send_artifact(self.text_file_path, destination="something.txt")
        exp = neptune.get_experiment()
        with self.with_check_if_file_appears("something.txt"):
            exp.download_artifact("something.txt")
        with self.with_check_if_file_appears("custom_dest/something.txt"):
            exp.download_artifact("something.txt", "custom_dest")

        # destination dirs
        neptune.log_artifact(self.text_file_path,
                             destination="dir/text file artifact")
        neptune.log_artifact(self.text_file_path,
                             destination="dir/artifact_to_delete")

        # deleting
        neptune.delete_artifacts("dir/artifact_to_delete")

        # streams
        with open(self.text_file_path, mode="r") as f:
            neptune.send_artifact(f, destination="file stream.txt")
Example #9
0
def box_plot(x,y,cl_ar,bok_file,epoch):
    class_names = ['squat', 'deadlift', 'pushups', 'pullups', 'wallpushups', 'lunges', 'other', 'cleanandjerk']
    num_of_classes = len(class_names)
    df = pd.DataFrame(data={ 'tar': x, 'pred': y, 'class': cl_ar})
    palette = magma(num_of_classes + 1)
    p = figure(plot_width=600, plot_height=800, title=f"Ranking by exercise, epoch {epoch}")
    p.xgrid.grid_line_color = None
    p.xaxis.axis_label = 'Target ranking'
    p.yaxis.axis_label = 'Predicted ranking'
    

    
    for cl in range(num_of_classes):
        if cl == 6:
            continue
        df2 = df.loc[df['class']==cl]
        p.circle(x=jitter('tar', 0.5), y='pred', size=8, alpha=0.1, color=palette[cl], legend=class_names[cl], source=df2 )
        p.line(x='tar', y='pred', line_width=2, alpha=0.5, color=palette[cl], source=df2.groupby(by="tar").mean())
    p.legend.location = "top_left"
    p.legend.click_policy="hide"
    output_file(bok_file, title="Ranking by exercise")
    save(p)
    pil_image2 = get_screenshot_as_png(p)
    neptune.send_image('rank_distances', pil_image2)
    def run_training_games_multi_process(
            self,
            opponent_to_train,
            baselines,
            epochs,
            n_test_games,
            mcts_passes,
            exploration_ceofficient,
            experiment_name: str = 'MCTS value training',
            weights_path=None,
            confidence_threshold: float = 0.1,
            confidence_limit: int = 2,
            count_ratio: float = 6,
            replay_buffer_n_games: int = 10,
            neural_network_train_epochs: int = 2,
            reset_network: bool = True,
            create_visualizer: bool = True,
            use_neptune: bool = True,
            tags=['experiment'],
            source_files=None):

        count_threshold = int(count_ratio * mcts_passes)
        if main_process:
            self.params['mcts passes'] = mcts_passes
            self.params['exploration coefficient'] = exploration_ceofficient
            self.params['n test games'] = n_test_games
            self.params['n proc'] = comm_size
            self.params['replay buffer games'] = replay_buffer_n_games
            self.params[
                'opponent name'] = opponent_to_train.name if opponent_to_train != 'self' else 'self-play'
            self.params['train_epochs'] = neural_network_train_epochs
            self.params['count threshold'] = count_threshold
            self.parse_params_files()

        self.mcts_agent = SingleMCTSAgent(mcts_passes,
                                          self.value_policy,
                                          exploration_ceofficient,
                                          create_visualizer=create_visualizer,
                                          show_unvisited_nodes=False,
                                          log_to_neptune=(main_process
                                                          and use_neptune))

        if opponent_to_train == 'self':
            self.opponent = SingleMCTSAgent(mcts_passes,
                                            self.opponent_value_policy,
                                            exploration_ceofficient,
                                            create_visualizer=False,
                                            show_unvisited_nodes=False,
                                            log_to_neptune=False)
            self.opponent.name = 'MCTS - opponent'
        else:
            self.opponent = opponent_to_train

        if main_process and use_neptune:
            self.create_neptune_experiment(experiment_name=experiment_name,
                                           source_files=source_files)
            if opponent_to_train == 'self':
                tags.append('self-play')
            neptune.append_tag(tags)

        for epoch_idx in range(epochs):

            if n_test_games > 0:

                for baseline in baselines:
                    results_with_baseline = self.arena.run_many_duels(
                        'deterministic', [self.mcts_agent, baseline],
                        n_games=n_test_games,
                        n_proc_per_agent=1,
                        shuffle=False)

                    if main_process:
                        print(results_with_baseline)
                        _, _, baseline_win_rate, baseline_victory_points = results_with_baseline.return_stats(
                        )
                        neptune.send_metric(f'Win rate vs {baseline.name}',
                                            x=epoch_idx + 1,
                                            y=baseline_win_rate / n_test_games)
                        neptune.send_metric(f'Win points vs {baseline.name}',
                                            x=epoch_idx + 1,
                                            y=baseline_victory_points /
                                            n_test_games)

            if main_process:
                print('============ \n Running MCTS games \n============')
            results = self.arena.run_many_duels(
                'deterministic', [self.mcts_agent, self.opponent],
                n_games=comm_size,
                n_proc_per_agent=1,
                shuffle=False)
            if main_process:
                print(results)
            self.data_collector.setup_root(
                self.mcts_agent.mcts_algorithm.original_root)
            local_data_for_training = self.data_collector.generate_all_tree_data_as_list(
                confidence_threshold, count_threshold, confidence_limit)
            combined_data = mpi_communicator.gather(local_data_for_training,
                                                    root=0)
            if main_process:
                data_from_this_epoch = self.flatten_data(combined_data)
                self.replay_buffer.add_game(data_from_this_epoch)
                data_for_training = self.replay_buffer.data_from_last_games(
                    replay_buffer_n_games)
                _, _, mcts_win_rate, mcts_victory_points = results.return_stats(
                )
                if use_neptune:
                    neptune.log_metric('MCTS train win rate',
                                       x=epoch_idx,
                                       y=mcts_win_rate / comm_size)
                    neptune.log_metric('MCTS train victory points',
                                       x=epoch_idx,
                                       y=mcts_victory_points / comm_size)
                plt.hist(data_for_training['mcts_value'], bins=100)
                plt.savefig('epoch_histogram.png')
                plt.clf()
                img_histogram = Image.open('epoch_histogram.png')
                if use_neptune:
                    neptune.send_image(
                        f'Train set histogram epoch = {epoch_idx}',
                        img_histogram)
                self.data_collector.clean_memory()
                if reset_network:
                    self.reset_weights()
                fit_history = self.model.train_on_mcts_data(
                    data_for_training,
                    train_epochs=neural_network_train_epochs)
                if use_neptune:
                    neptune.send_metric('training set size',
                                        x=epoch_idx,
                                        y=len(data_for_training['mcts_value']))
                    neptune.send_metric('loss',
                                        x=epoch_idx,
                                        y=fit_history.history['loss'][0])
                self.mcts_agent.dump_weights(weights_file=weights_path +
                                             f'epoch_{epoch_idx}.h5')

                saved = main_process
                weights_saved = mpi_communicator.bcast(saved, root=0)

            if not main_process:
                self.mcts_agent.load_weights(weights_file=weights_path +
                                             f'epoch_{epoch_idx}.h5')
                self.opponent.load_weights(weights_file=weights_path +
                                           f'epoch_{epoch_idx}.h5')

        if main_process and use_neptune:
            neptune.stop()
Example #11
0
 def log_histograms(self, file1, file2):
     img1 = Image.open(file1)
     neptune.send_image('train set histogram', img1)
     img2 = Image.open(file2)
     neptune.send_image('val set histogram', img2)
Example #12
0
    print('Best Params: {}'.format(best_params))

    neptune.send_metric('validation auc', best_auc)
    neptune.set_property('best_params', str(to_named_params(best_params)))

    # log results
    skopt.dump(results, 'artifacts/gp_results.pkl')
    joblib.dump(SPACE, 'artifacts/gp_space.pkl')

    neptune.send_artifact('artifacts/gp_results.pkl')
    neptune.send_artifact('artifacts/gp_space.pkl')

    # log diagnostic plots
    fig, ax = plt.subplots(figsize=(16, 12))
    skopt.plots.plot_convergence(results, ax=ax)
    fig.savefig('plots/gp_convergence.png')

    neptune.send_image('diagnostics', 'plots/gp_convergence.png')

    axes = skopt.plots.plot_evaluations(results)
    fig = axes2fig(axes, figsize=(16, 12))
    fig.savefig('plots/gp_evaluations.png')

    neptune.send_image('diagnostics', 'plots/gp_evaluations.png')

    axes = skopt.plots.plot_objective(results)
    fig = axes2fig(axes, figsize=(16, 12))
    fig.savefig('plots/gp_objective.png')

    neptune.send_image('diagnostics', 'plots/gp_objective.png')
Example #13
0
def prepare_plots2(pairings,target_vals,epoch, METRICSPATH):

    cfm_file = f"{METRICSPATH}/confusion-{epoch}.png"
    bok_file = f"{METRICSPATH}/ranking_{epoch}.html"
    pairing_file = f"{METRICSPATH}/pairings_{epoch}.csv"

    classes, rankings, preds, embeds = [],[],[],[]
    rank1, rank2, dists, pair_class = [], [], [], []

    for t_ , p_ in zip(target_vals, pairings):
        cl_,ra_,pr_,em_ = t_
        rk1_, rk2_, dist_, pcl_ = zip(*p_)
        classes.append(cl_)
        rankings.append(ra_)
        preds.append(pr_)
        embeds.append(em_)

        rank1.append(rk1_)
        rank2.append(rk2_)
        dists.append(dist_)
        pair_class.append(pcl_)


    classes = np.squeeze(np.concatenate(classes))
    rankings = np.squeeze(np.concatenate(rankings))
    predictions = np.concatenate([softmax(p,axis=0) for p in preds])
    embeds = np.concatenate(embeds)
    dists = np.concatenate(dists)
    rank1 = np.concatenate(rank1)
    rank2 = np.concatenate(rank2)
    pair_class = np.concatenate(pair_class)
    
    activations = np.argmax(predictions,axis=1) 
    conf_mat = confusion_matrix(classes,activations)
    fig = plt.figure(figsize=[10,8])
    plot_confusion_matrix(conf_mat, classes=class_names, normalize=False,
                      title=f'Confusion matrix epoch {epoch}')
    plt.savefig(cfm_file,format="png")
    pil_image = fig2pil(fig)
    neptune.send_image('conf_mat', pil_image)


    max_rank = np.max(rankings)
    true_dist = rank1 - rank2
    tar_d = max_rank - true_dist
    pred_d = max_rank - dists
    pair_class = pair_class.astype(int)
    df = pd.DataFrame(data={ 'tar': tar_d, 'pred': pred_d, 'class': pair_class})

    
    palette = magma(num_of_classes + 1)
    p = figure(plot_width=600, plot_height=800, title=f"Ranking by exercise, epoch {epoch}")
    p.xgrid.grid_line_color = None
    p.xaxis.axis_label = 'Target ranking'
    p.yaxis.axis_label = 'Predicted ranking'
    

    
    for cl in range(num_of_classes):
        if cl == 6:
            continue
        df2 = df.loc[df['class']==cl]
        p.circle(x=jitter('tar', 0.5), y='pred', size=8, alpha=0.1, color=palette[cl], legend=class_names[cl], source=df2 )
        p.line(x='tar', y='pred', line_width=2, alpha=0.5, color=palette[cl], legend=class_names[cl], source=df2.groupby(by="tar").mean())
    p.legend.location = "top_left"
    p.legend.click_policy="hide"
    output_file(bok_file, title="Ranking by exercise")
    save(p)
    pil_image2 = get_screenshot_as_png(p)
    neptune.send_image('rank_distances', pil_image2)

    pair_df = pd.DataFrame(data={ 'class': pair_class, 'rank1': rank1, 'rank2': rank2, 'dist':dists, 'true_dist':true_dist}).to_csv(pairing_file, index=False)
Example #14
0
def main(args):
    utils.makedirs(args.save_dir)
    with open(f'{args.save_dir}/params.txt', 'w') as f:
        json.dump(args.__dict__, f)
    if args.print_to_log:
        sys.stdout = open(f'{args.save_dir}/log.txt', 'w')

    t.manual_seed(seed)
    if t.cuda.is_available():
        t.cuda.manual_seed_all(seed)

    # datasets
    dload_train, dload_train_labeled, dload_valid, dload_test, dload_train_splits, dload_train_labeled_splits, labels_in_splits = get_data(
        args)

    device = t.device('cuda' if t.cuda.is_available() else 'cpu')

    sample_q = get_sample_q(args, device)
    f, replay_buffer, past_buffer = get_model_and_buffer(
        args, device, sample_q)

    sqrt = lambda x: int(t.sqrt(t.Tensor([x])))
    plot = lambda p, x: tv.utils.save_image(
        t.clamp(x, -1, 1), p, normalize=True, nrow=sqrt(x.size(0)))

    # optimizer
    params = f.class_output.parameters() if args.clf_only else f.parameters()
    if args.optimizer == "adam":
        optim = t.optim.Adam(params,
                             lr=args.lr,
                             betas=[.9, .999],
                             weight_decay=args.weight_decay)
    else:
        optim = t.optim.SGD(params,
                            lr=args.lr,
                            momentum=.9,
                            weight_decay=args.weight_decay)

    best_valid_acc = 0.0

    global_iter = 0
    for sp in range(args.num_splits):
        print('\nFIT ON {}. SPLIT\n'.format(sp))
        cur_iter = 0

        past_labels = list(np.array(labels_in_splits[:sp]).flat)
        print(past_labels)
        curr_labels = list(np.array(labels_in_splits[sp]).flat)
        print(curr_labels)

        dload_train = dload_train_splits[sp]
        dload_train_labeled = dload_train_labeled_splits[sp]

        if sp > 0:
            dload_train_prev_labeled = dload_train_labeled_splits[sp - 1]

        if args.p_y_given_x_past_weight > 0 and len(past_labels) > 0:
            print("Wokring on the past buffer a little bit..")
            for j in range(500):
                y_past = t.from_numpy(
                    np.random.choice(past_labels, args.batch_size,
                                     p=None)).to(device)
                x_past = sample_q(f, past_buffer, y=y_past, n_steps=100)

        for epoch in range(args.n_epochs):
            if epoch in args.decay_epochs:
                for param_group in optim.param_groups:
                    new_lr = param_group['lr'] * args.decay_rate
                    param_group['lr'] = new_lr
                print("Decaying lr to {}".format(new_lr))
            for i, (x_p_d, _) in tqdm(enumerate(dload_train)):
                if cur_iter <= args.warmup_iters:
                    lr = args.lr * cur_iter / float(args.warmup_iters)
                    for param_group in optim.param_groups:
                        param_group['lr'] = lr

                x_p_d = x_p_d.to(device)
                x_lab, y_lab = dload_train_labeled.__next__()
                x_lab, y_lab = x_lab.to(device), y_lab.to(device)

                L = 0.
                if args.p_x_weight > 0:  # maximize log p(x)
                    if args.class_cond_p_x_sample:
                        assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond"
                        y_q = t.from_numpy(
                            np.random.choice(curr_labels,
                                             args.batch_size,
                                             p=None)).to(device)
                        #y_q = t.randint(0, args.n_classes, (args.batch_size,)).to(device)
                        x_q = sample_q(f, replay_buffer, y=y_q)
                    else:
                        x_q = sample_q(f,
                                       replay_buffer)  # sample from log-sumexp

                    fp_all = f(x_p_d)
                    fq_all = f(x_q)
                    fp = fp_all.mean()
                    fq = fq_all.mean()

                    l_p_x = -(fp - fq)
                    if global_iter % args.print_every == 0:
                        print(
                            'P(x) | {}:{:>d} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f} d={:>14.9f}'
                            .format(epoch, i, fp, fq, fp - fq))
                        neptune.send_metric('f(x_p_d)', x=global_iter, y=fp)
                        neptune.send_metric('f(x_q)', x=global_iter, y=fq)
                        neptune.send_metric('f(x_p_d)-f(x_q)',
                                            x=global_iter,
                                            y=fp - fq)

                    L += args.p_x_weight * l_p_x

                if args.p_y_given_x_weight > 0:  # maximize log p(y | x)
                    logits = f.classify(x_lab)
                    l_p_y_given_x = nn.CrossEntropyLoss()(logits, y_lab)
                    if global_iter % args.print_every == 0:
                        acc = (logits.max(1)[1] == y_lab).float().mean()
                        print('P(y|x) {}:{:>d} loss={:>14.9f}, acc={:>14.9f}'.
                              format(epoch, global_iter, l_p_y_given_x.item(),
                                     acc.item()))
                        neptune.send_metric('acc', x=global_iter, y=acc.item())
                        neptune.send_metric('loss_p_y_given_x',
                                            x=global_iter,
                                            y=l_p_y_given_x.item())

                    L += args.p_y_given_x_weight * l_p_y_given_x

                if args.p_x_y_weight > 0:  # maximize log p(x, y)
                    assert not args.uncond, "this objective can only be trained for class-conditional EBM DUUUUUUUUHHHH!!!"
                    x_q_lab = sample_q(f, replay_buffer, y=y_lab)
                    fp, fq = f(x_lab, y_lab).mean(), f(x_q_lab, y_lab).mean()
                    l_p_x_y = -(fp - fq)
                    if global_iter % args.print_every == 0:
                        print(
                            'P(x, y) | {}:{:>d} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f} d={:>14.9f}'
                            .format(epoch, i, fp, fq, fp - fq))
                        neptune.send_metric('f(x_p_d)', x=global_iter, y=fp)
                        neptune.send_metric('f(x_q)', x=global_iter, y=fq)
                        neptune.send_metric('l_p_x_y',
                                            x=global_iter,
                                            y=-(fp - fq))

                    L += args.p_x_y_weight * l_p_x_y

                if args.p_y_given_x_past_weight > 0 and len(past_labels) > 0:
                    y_past = t.from_numpy(
                        np.random.choice(past_labels, args.batch_size,
                                         p=None)).to(device)
                    ##y_past = t.randint(0, args.n_classes, (args.batch_size,)).to(device)
                    x_past = sample_q(f, past_buffer, y=y_past)

                    #x_past, y_past = dload_train_prev_labeled.__next__()
                    x_past, y_past = x_past.to(device), y_past.to(device)

                    logits_past = f.classify(x_past)
                    l_p_y_given_x_past = nn.CrossEntropyLoss()(logits_past,
                                                               y_past)
                    if global_iter % args.print_every == 0:
                        acc_past = (logits.max(1)[1] == y_past).float().mean()
                        print(
                            'P(y|x_past) {}:{:>d} loss={:>14.9f}, acc={:>14.9f}'
                            .format(epoch, global_iter,
                                    l_p_y_given_x_past.item(),
                                    acc_past.item()))
                        neptune.send_metric('acc_past',
                                            x=global_iter,
                                            y=acc_past.item())
                        neptune.send_metric('loss_p_y_given_x_past',
                                            x=global_iter,
                                            y=l_p_y_given_x_past.item())

                    L += args.p_y_given_x_past_weight * l_p_y_given_x_past

                # break if the loss diverged...easier for poppa to run experiments this way
                if L.abs().item() > 1e8:
                    print("BAD BOIIIIIIIIII")
                    1 / 0

                optim.zero_grad()
                L.backward()
                optim.step()
                cur_iter += 1
                global_iter += 1

                if global_iter % 100 == 0:
                    if args.plot_uncond:
                        if args.class_cond_p_x_sample:
                            assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond"
                            y_q = t.randint(0, args.n_classes,
                                            (args.batch_size, )).to(device)
                            x_q = sample_q(f, replay_buffer, y=y_q)
                        else:
                            x_q = sample_q(f, replay_buffer)
                        filename_uncond = '{}/x_q_{}_{:>06d}_{:>06d}.png'.format(
                            args.save_dir, epoch, i, global_iter)
                        plot(filename_uncond, x_q)
                        neptune.send_image('samples_uncond',
                                           x=global_iter,
                                           y=filename_uncond)
                    if args.plot_cond:  # generate class-conditional samples
                        y = t.arange(0,
                                     args.n_classes).repeat(10, 1).transpose(
                                         1, 0).contiguous().view(-1).to(device)
                        x_q_y = sample_q(f, replay_buffer, y=y)
                        filename_cond = '{}/x_q_y{}_{:>06d}_{:>06d}.png'.format(
                            args.save_dir, epoch, i, global_iter)
                        plot(filename_cond, x_q_y)
                        neptune.send_image('samples_cond',
                                           x=global_iter,
                                           y=filename_cond)

            if epoch % args.ckpt_every == 0:
                checkpoint(f, replay_buffer, f'ckpt_{epoch}.pt', args, device)

            if epoch % args.eval_every == 0 and (args.p_y_given_x_weight > 0
                                                 or args.p_x_y_weight > 0):
                f.eval()
                with t.no_grad():
                    # validation set
                    correct, loss = eval_classification(f, dload_valid, device)
                    print("Epoch {}: Valid Loss {}, Valid Acc {}".format(
                        epoch, loss, correct))
                    neptune.send_metric('valid_loss', x=global_iter, y=loss)
                    neptune.send_metric('valid_acc', x=global_iter, y=correct)

                    if correct > best_valid_acc:
                        best_valid_acc = correct
                        print("Best Valid!: {}".format(correct))
                        checkpoint(f, replay_buffer, "best_valid_ckpt.pt",
                                   args, device)
                        neptune.send_metric('best_valid',
                                            x=global_iter,
                                            y=best_valid_acc)
                    # test set
                    correct, loss = eval_classification(f, dload_test, device)
                    print("Epoch {}: Test Loss {}, Test Acc {}".format(
                        epoch, loss, correct))
                    neptune.send_metric('test_loss', x=global_iter, y=loss)
                    neptune.send_metric('test_acc', x=global_iter, y=correct)

                f.train()
            checkpoint(f, replay_buffer, "last_ckpt.pt", args, device)
                                   **HPO_PARAMS)
    best_auc = -1.0 * results.fun
    best_params = results.x

    # log metrics
    print('Best Validation AUC: {}'.format(best_auc))
    print('Best Params: {}'.format(best_params))

    neptune.send_metric('validation auc', best_auc)
    neptune.set_property('best_params', str(to_named_params(best_params)))

    # log results
    skopt.dump(results, 'artifacts/random_results.pkl')
    joblib.dump(SPACE, 'artifacts/random_space.pkl')

    neptune.send_artifact('artifacts/random_results.pkl')
    neptune.send_artifact('artifacts/random_space.pkl')

    # log diagnostic plots
    fig, ax = plt.subplots(figsize=(16, 12))
    skopt.plots.plot_convergence(results, ax=ax)
    fig.savefig('plots/random_convergence.png')

    neptune.send_image('diagnostics', 'plots/random_convergence.png')

    axes = skopt.plots.plot_evaluations(results)
    fig = axes2fig(axes, figsize=(16, 12))
    fig.savefig('plots/random_evaluations.png')

    neptune.send_image('diagnostics', 'plots/random_evaluations.png')
Example #16
0
def save_image(x, name, it, filename, normalize=True, x_range=(0., 1.)):
    vutils.save_image(x, filename, nrow = 10, normalize=normalize, range=x_range)
    neptune.send_image(name, x=it, y=filename)
def main():
    neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                 project_qualified_name=os.getenv('NEPTUNE_PROJECT'))

    train_idx = pd.read_csv(TRAIN_IDX_PATH, nrows=NROWS)
    valid_idx = pd.read_csv(VALID_IDX_PATH, nrows=NROWS)
    features = pd.read_csv(FEATURES_PATH, nrows=NROWS)

    train = pd.merge(train_idx, features, on='SK_ID_CURR')
    valid = pd.merge(valid_idx, features, on='SK_ID_CURR')

    all_params = {
        'num_boost_round': NUM_BOOST_ROUND,
        'early_stopping_rounds': EARLY_STOPPING_ROUNDS,
        **LGBM_PARAMS
    }

    with neptune.create_experiment(name='model training',
                                   params=all_params,
                                   tags=['lgbm'],
                                   upload_source_files=get_filepaths(),
                                   properties={
                                       'features_path':
                                       FEATURES_PATH,
                                       'features_version':
                                       md5_hash(FEATURES_PATH),
                                       'train_split_version':
                                       md5_hash(TRAIN_IDX_PATH),
                                       'valid_split_version':
                                       md5_hash(VALID_IDX_PATH),
                                   }):
        results = train_evaluate(train,
                                 valid,
                                 LGBM_PARAMS,
                                 callbacks=[neptune_monitor()])
        train_score, valid_score = results['train_score'], results[
            'valid_score']
        train_preds, valid_preds = results['train_preds'], results[
            'valid_preds']

        neptune.send_metric('train_auc', train_score)
        neptune.send_metric('valid_auc', valid_score)

        train_pred_path = os.path.join(PREDICTION_DIRPATH, 'train_preds.csv')
        train_preds.to_csv(train_pred_path, index=None)
        neptune.send_artifact(train_pred_path)

        valid_pred_path = os.path.join(PREDICTION_DIRPATH, 'valid_preds.csv')
        valid_preds.to_csv(valid_pred_path, index=None)
        neptune.send_artifact(valid_pred_path)

        model_path = os.path.join(MODEL_DIRPATH, 'model.pkl')
        joblib.dump(results['model'], model_path)
        neptune.set_property('model_path', model_path)
        neptune.set_property('model_version', md5_hash(model_path))
        neptune.send_artifact(model_path)

        if PACKAGE_TO_PROD:
            saved_path = CreditDefaultClassifier.pack(
                model=results['model']).save(PRODUCTION_DIRPATH)
            neptune.set_property('production_model_path', saved_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_confusion_matrix(valid_preds['TARGET'],
                                         valid_preds['preds_pos'] > 0.5,
                                         ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'conf_matrix.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_roc(valid_preds['TARGET'],
                            valid_preds[['preds_neg', 'preds_pos']],
                            ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'roc_auc.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_precision_recall(
            valid_preds['TARGET'],
            valid_preds[['preds_neg', 'preds_pos']],
            ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'prec_recall.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        plot_prediction_distribution(valid_preds['TARGET'],
                                     valid_preds['preds_pos'],
                                     ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'preds_dist.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)
Example #18
0
        best_auc = -1.0 * results.fun
        best_params = results.x

        neptune.send_metric('valid_auc', best_auc)
        neptune.set_property('best_params', str(to_named_params(best_params)))

        # log results
        skopt.dump(results, os.path.join(REPORTS_DIRPATH, 'skopt_results.pkl'))
        neptune.send_artifact(
            os.path.join(REPORTS_DIRPATH, 'skopt_results.pkl'))

        # log diagnostic plots
        fig, ax = plt.subplots(figsize=(16, 12))
        skopt.plots.plot_convergence(results, ax=ax)
        fig.savefig(os.path.join(REPORTS_DIRPATH, 'convergence.png'))
        neptune.send_image('diagnostics',
                           os.path.join(REPORTS_DIRPATH, 'convergence.png'))

        axes = skopt.plots.plot_evaluations(results)
        fig = plt.figure(figsize=(16, 12))
        fig = axes2fig(axes, fig)
        fig.savefig(os.path.join(REPORTS_DIRPATH, 'evaluations.png'))
        neptune.send_image('diagnostics',
                           os.path.join(REPORTS_DIRPATH, 'evaluations.png'))

        axes = skopt.plots.plot_objective(results)
        fig = plt.figure(figsize=(16, 12))
        fig = axes2fig(axes, fig)
        fig.savefig(os.path.join(REPORTS_DIRPATH, 'objective.png'))
        neptune.send_image('diagnostics',
                           os.path.join(REPORTS_DIRPATH, 'objective.png'))