Ejemplo n.º 1
0
def train(path):
    name = os.path.splitext(os.path.basename(path))[0]
    print('Processing: ', name)
    features = pd.read_csv(path, index_col=None)
    selected_features_names = [name for name, desc in selected_features]
    features = features[selected_features_names]
    split_idx = 1200
    features = features.drop(['sound.files'], axis=1)
    noise_only_df, df = features.iloc[:split_idx], features.iloc[split_idx:]
    y = df.pop('petrel')
    X = df.values
    y_noise = noise_only_df.pop('petrel')
    X_noise = noise_only_df.values
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
    hyperparams = {
        'n_estimators': [100, 300, 500, 1000],
        'learning_rate': [0.1],
        'gamma': [0.0, 0.5],
        'max_depth': [2, 3, 4],
        'min_child_weight': [1, 2],
        'subsample': [1.0, 0.8],
        'reg_alpha': [0.0, 0.1],
        'reg_lambda': [1, 2, 3]
    }
    #
    # hyperparams = {
    #     'n_estimators': [100],
    #     'learning_rate': [0.1],
    #     'gamma': [0.0],
    #     'max_depth': [2],
    #     'min_child_weight': [1],
    #     'subsample': [1.0],
    #     'reg_alpha': [0.0],
    #     'reg_lambda': [1]
    # }

    clf = model_selection.GridSearchCV(estimator=xg.XGBClassifier(objective='binary:logistic', n_jobs=-1),
                                       param_grid=hyperparams,
                                       cv=4)
    fit_params = clf.fit(X_train, y_train)
    estimator = fit_params.best_estimator_
    joblib.dump(estimator, name + '_model.pkl')

    test_pred = estimator.predict(X_test)
    metrics = calculate_metrics(test_pred, y_test)

    noise_pred = estimator.predict(X_noise)
    noise_detection_accuracy = accuracy_score(y_noise, noise_pred)

    experiment = Experiment(api_key="4PdGdUZmGf6P8QsMa5F2zB4Ui",
                            project_name="storm petrels",
                            workspace="tracewsl")
    experiment.set_name(name)
    experiment.log_parameter('name', name)
    experiment.log_multiple_params(fit_params.best_params_)
    experiment.log_multiple_metrics(metrics)
    experiment.log_metric('Noise detection accuracy', noise_detection_accuracy)
    experiment.log_figure('Confusion matrix', get_confusion_matrix_figure(test_pred, y_test))
    experiment.log_figure('Feature importnace', get_feature_importance_figure(estimator, list(df.columns.values)))
Ejemplo n.º 2
0
            labels = Variable(labels)

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = rnn(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Compute train accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += float((predicted == labels.data).sum())

            # Log accuracy to Comet.ml
            experiment.log_metric("accuracy", 100 * correct / total, step=step)
            step += 1

            if (i + 1) % 100 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                      (epoch + 1, hyper_params['num_epochs'], i + 1,
                       len(train_dataset) // hyper_params['batch_size'],
                       loss.data.item()))
with experiment.test():
    # Test the Model
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = Variable(
            images.view(-1, hyper_params['sequence_length'],
                        hyper_params['input_size']))
Ejemplo n.º 3
0
        hprev_val = np.zeros([1, hidden_size])
        p = 0  # reset

    # Prepare inputs
    input_vals = [char_to_ix[ch] for ch in data[p:p + seq_length]]
    target_vals = [char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]]

    input_vals = one_hot(input_vals)
    target_vals = one_hot(target_vals)

    hprev_val, loss_val, _ = sess.run([hprev, loss, updates],
                                      feed_dict={inputs: input_vals,
                                                 targets: target_vals,
                                                 init_state: hprev_val})
    # log the loss to Comet.ml
    experiment.log_metric("loss", loss_val, step=n)

    if n % 500 == 0:
        # Log Progress

        print('iter: %d, p: %d, loss: %f' % (n, p, loss_val))

        # Do sampling
        sample_length = 200
        start_ix = random.randint(0, len(data) - seq_length)
        sample_seq_ix = [char_to_ix[ch]
                         for ch in data[start_ix:start_ix + seq_length]]
        ixes = []
        sample_prev_state_val = np.copy(hprev_val)

        for t in range(sample_length):
Ejemplo n.º 4
0
def main():

    args = get_args()
    hyperparams = vars(args)

    if not args.no_comet:
        experiment = Experiment(api_key="5yzCYxgDmFnt1fhJWTRQIkETT",
                                log_code=True)
        experiment.log_multiple_params(hyperparams)

    text_field = data.Field(tokenize=custom_tokenizer,
                            fix_length=args.sentence_len,
                            unk_token='<**UNK**>')
    label_field = data.Field(sequential=False, unk_token=None)
    pair_field = data.RawField()

    if args.dataset == 'multinli':
        print('Loading MultiNLI Dataset')
        train = get_dataset(text_field, label_field, pair_field, 'train')
        val = get_dataset(text_field, label_field, pair_field, args.val_set)
    elif args.dataset == 'snli':
        print('Loading SNLI Dataset')
        train, val, test = datasets.SNLI.splits(text_field, label_field)
        del test
    else:
        raise Exception('Incorrect Dataset Specified')

    text_field.build_vocab(train, max_size=args.max_vocab_size)
    label_field.build_vocab(train, val)

    if args.word_vectors:
        text_field.vocab.load_vectors(args.word_vectors)

    device = -1
    if args.cuda:
        device = None

    print('Generating Iterators')
    train_iter, val_iter = data.BucketIterator.splits(
        (train, val),
        batch_size=args.batch_size,
        shuffle=True,
        sort_key=sort_key,
        device=device)
    train_iter.repeat = False

    args.n_embed = len(text_field.vocab)
    args.d_out = len(label_field.vocab)
    args.n_cells = args.n_layers
    if args.bidir:
        args.n_cells *= 2
    print(args)

    if args.load_model:
        model = torch.load(args.load_model)
    else:
        model = MODELS[args.model_type](args)
        print('Loading Word Embeddings')
        model.embed.weight.data = text_field.vocab.vectors

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr)

    if args.cuda:
        model = model.cuda()
        criterion = criterion.cuda()
    print(model)

    print('Training Model')

    best_val_acc = 0.0
    val_acc_history = []

    for epoch in range(1, args.n_epochs + 1):

        if (args.model_type == 'DA') and (best_val_acc >= args.DA_embed_train):
            model.embed.weight.requires_grad = True

        train_iter.init_epoch()
        for batch_ind, batch in enumerate(train_iter):
            model.train()
            optimizer.zero_grad()
            out = model(batch)
            loss = criterion(out, batch.label)
            loss.backward()
            clip_grad_norm(
                filter(lambda p: p.requires_grad, model.parameters()), 10)
            optimizer.step()

            if (batch_ind != 0) and (batch_ind % args.dev_every == 0):
                val_correct, val_loss = evaluate(val_iter, model, criterion)
                val_accuracy = 100 * val_correct / len(val)

                print('    Batch Step {}/{}, Val Loss: {:.4f}, Val Accuracy: {:.4f}'.\
                            format(batch_ind,
                                   len(train) // args.batch_size,
                                   val_loss,
                                   val_accuracy))

        train_correct, train_loss = evaluate(train_iter, model, criterion)
        val_correct, val_loss = evaluate(val_iter, model, criterion)
        val_accuracy = 100 * val_correct / len(val)
        val_acc_history.append(val_accuracy)

        stop_training = early_stop(val_acc_history)

        if not args.no_comet:
            experiment.log_metric("Train loss", train_loss)
            experiment.log_metric("Val loss", val_loss)
            experiment.log_metric("Accuracy (val)", val_accuracy)
            experiment.log_metric("Accuracy (train)",
                                  100 * train_correct / len(train))

        if args.save_model and (val_accuracy > best_val_acc):
            best_val_acc = val_accuracy
            if best_val_acc > 60:
                snapshot_path = '../saved_models/Model_{}_acc_{:.4f}_epoch_{}_model.pt'.format(
                    args.model_type, val_accuracy, epoch)

                if args.cuda:
                    torch.save(model.cpu(), snapshot_path)
                    model = model.cuda()
                else:
                    torch.save(model, snapshot_path)

        print('Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}, Train Acc: {:.2f}, Val Acc: {:.2f}, Best Val Acc: {:.2f}'.\
                format(epoch,
                       train_loss,
                       val_loss,
                       100 * train_correct / len(train),
                       val_accuracy,
                       best_val_acc))

        if stop_training:
            print('Early stop triggered.')
            break
def train_traditional(hyper_params, teacher, student, sf_teacher, sf_student,
                      trainloader, valloader, args):
    for stage in range(2):
        # Load previous stage model (except zeroth stage)
        if stage != 0:
            hyper_params['stage'] = stage - 1
            student.load_state_dict(
                torch.load(
                    get_savename(hyper_params,
                                 args.dataset,
                                 mode='traditional-stage',
                                 p=args.percentage)))

        # update hyperparams dictionary
        hyper_params['stage'] = stage

        # Freeze all stages except current stage
        student = unfreeze_trad(student, hyper_params['stage'])

        project_name = 'trad-kd-' + hyper_params[
            'dataset'] + '-' + hyper_params['model']
        experiment = Experiment(api_key="1jNZ1sunRoAoI2TyremCNnYLO",
                                project_name=project_name,
                                workspace="semseg_kd")
        experiment.log_parameters(hyper_params)

        optimizer = torch.optim.Adam(student.parameters(),
                                     lr=hyper_params['learning_rate'])
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=1e-2,
            steps_per_epoch=len(trainloader),
            epochs=hyper_params['num_epochs'])
        criterion = nn.MSELoss()

        savename = get_savename(hyper_params,
                                args.dataset,
                                mode='traditional-stage',
                                p=args.percentage)
        lowest_val_loss = 100
        for epoch in range(hyper_params['num_epochs']):
            student, lowest_val_loss, train_loss, val_loss = train_stage(
                model=student,
                teacher=teacher,
                stage=hyper_params['stage'],
                sf_student=sf_student,
                sf_teacher=sf_teacher,
                train_loader=trainloader,
                val_loader=valloader,
                loss_function=criterion,
                optimiser=optimizer,
                scheduler=scheduler,
                epoch=epoch,
                num_epochs=hyper_params['num_epochs'],
                savename=savename,
                lowest_val=lowest_val_loss,
                args=args)
            experiment.log_metric('train_loss', train_loss)
            experiment.log_metric('val_loss', val_loss)
            print(round(val_loss, 6))

    # Classifier training
    hyper_params['stage'] = 1
    student.load_state_dict(
        torch.load(
            get_savename(hyper_params,
                         args.dataset,
                         mode='traditional-stage',
                         p=args.percentage)))
    hyper_params['stage'] = 2

    # Freeze all stages except current stage
    student = unfreeze_trad(student, hyper_params['stage'])

    project_name = 'trad-kd-' + hyper_params['dataset'] + '-' + hyper_params[
        'model']
    experiment = Experiment(api_key="1jNZ1sunRoAoI2TyremCNnYLO",
                            project_name=project_name,
                            workspace="semseg_kd")
    experiment.log_parameters(hyper_params)

    optimizer = torch.optim.Adam(student.parameters(),
                                 lr=hyper_params['learning_rate'])
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=1e-2,
        steps_per_epoch=len(trainloader),
        epochs=hyper_params['num_epochs'])
    if hyper_params['dataset'] == 'camvid':
        criterion = nn.CrossEntropyLoss(ignore_index=11)
    else:
        criterion = nn.CrossEntropyLoss(ignore_index=250)
        hyper_params['num_classes'] = 19

    savename = get_savename(hyper_params,
                            args.dataset,
                            mode='traditional-kd',
                            p=args.percentage)
    highest_iou = 0
    for epoch in range(hyper_params['num_epochs']):
        student, highest_iou, train_loss, val_loss, avg_iou, avg_pixel_acc, avg_dice_coeff = train(
            model=student,
            train_loader=trainloader,
            val_loader=valloader,
            num_classes=12,
            loss_function=criterion,
            optimiser=optimizer,
            scheduler=scheduler,
            epoch=epoch,
            num_epochs=hyper_params['num_epochs'],
            savename=savename,
            highest_iou=highest_iou,
            args=args)
        experiment.log_metric('train_loss', train_loss)
        experiment.log_metric('val_loss', val_loss)
        experiment.log_metric('avg_iou', avg_iou)
        experiment.log_metric('avg_pixel_acc', avg_pixel_acc)
        experiment.log_metric('avg_dice_coeff', avg_dice_coeff)
                                              random_state=42)

    checkpoint_callback = skopt.callbacks.CheckpointSaver(
        f'D:\\FINKI\\8_dps\\Project\\MODELS\\skopt_checkpoints\\{EXPERIMENT_ID}.pkl'
    )
    hyperparameters_optimizer.fit(X_train,
                                  y_train,
                                  callback=[checkpoint_callback])
    skopt.dump(hyperparameters_optimizer, f'saved_models\\{EXPERIMENT_ID}.pkl')

    y_pred = hyperparameters_optimizer.best_estimator_.predict(X_test)

    for i in range(len(hyperparameters_optimizer.cv_results_['params'])):
        exp = Experiment(
            api_key='A8Lg71j9LtIrsv0deBA0DVGcR',
            project_name=ALGORITHM,
            workspace="8_dps",
            auto_output_logging='native',
        )
        exp.set_name(f'{EXPERIMENT_ID}_{i+1}')
        exp.add_tags([
            DS,
            SEGMENTS_LENGTH,
        ])
        for k, v in hyperparameters_optimizer.cv_results_.items():
            if k == "params": exp.log_parameters(dict(v[i]))
            else: exp.log_metric(k, v[i])
        exp.end()

#%%
                # score
                mae_calculator_d1.eval(d1.cpu().detach().numpy(),
                                       d1_label.cpu().detach().numpy())
                mae_calculator_d2.eval(d2.cpu().detach().numpy(),
                                       d2_label.cpu().detach().numpy())
                mae_calculator_d3.eval(d3.cpu().detach().numpy(),
                                       d3_label.cpu().detach().numpy())
                mae_calculator_final.eval(d.cpu().detach().numpy(),
                                          d1_label.cpu().detach().numpy())
            print("count ", mae_calculator_d1.count)
            print("d1_val ", mae_calculator_d1.get_mae())
            print("d2_val ", mae_calculator_d2.get_mae())
            print("d3_val ", mae_calculator_d3.get_mae())
            print("dfinal_val ", mae_calculator_final.get_mae())
            experiment.log_metric("d1_val", mae_calculator_d1.get_mae())
            experiment.log_metric("d2_val", mae_calculator_d2.get_mae())
            experiment.log_metric("d3_val", mae_calculator_d3.get_mae())
            experiment.log_metric("dfinal_val", mae_calculator_final.get_mae())
        exit()

    while current_epoch < TOTAL_EPOCH:
        experiment.log_current_epoch(current_epoch)
        current_epoch += 1
        print("start epoch ", current_epoch)
        loss_sum = 0
        sample = 0
        start_time = time()
        counting = 0
        for train_img, label in train_loader_pacnn:
            net.train()
Ejemplo n.º 8
0
def main(cmd=None, stdout=True):
    args = get_args(cmd, stdout)

    model_id = "seed_{}_strat_{}_noise_fn_{}_noise_fp_{}_num_passes_{}_seed_size_{}_model_{}_batch_size_{}_gamma_{}_label_budget_{}_epochs_{}".format(
        args.seed, args.strategy, args.noise_fn, args.noise_fp, args.num_passes, args.seed_size, args.model, args.batch_size, args.gamma, args.label_budget, args.epochs)

    logging.basicConfig(
        filename="{}/{}.txt".format(args.dout, model_id),
        format='%(asctime)s %(levelname)-8s %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    logger = Experiment(comet_ml_key, project_name="ActiveDialogue")
    logger.log_parameters(vars(args))

    if args.model == "glad":
        model_arch = GLAD
    elif args.model == "gce":
        model_arch = GCE

    env = PartialEnv(load_dataset, model_arch, args)
    if args.seed_size:
        with logger.train():
            if not env.load('seed'):
                logging.info("No loaded seed. Training now.")
                env.seed_fit(args.seed_epochs, prefix="seed")
                logging.info("Seed completed.")
            else:
                logging.info("Loaded seed.")
                if args.force_seed:
                    logging.info("Training seed regardless.")
                    env.seed_fit(args.seed_epochs, prefix="seed")
        env.load('seed')

    use_strategy = False
    if args.strategy == "entropy":
        use_strategy = True
        strategy = partial_entropy
    elif args.strategy == "bald":
        use_strategy = True
        strategy = partial_bald

    if use_strategy:
        if args.threshold_strategy == "fixed":
            strategy = FixedThresholdStrategy(strategy, args, True)
        elif args.threshold_strategy == "variable":
            strategy = VariableThresholdStrategy(strategy, args, True)
        elif args.threshold_strategy == "randomvariable":
            strategy = StochasticVariableThresholdStrategy(
                strategy, args, True)

    ended = False
    i = 0

    initial_metrics = env.metrics(True)
    logger.log_current_epoch(i)
    logging.info("Initial metrics: {}".format(initial_metrics))
    for k, v in initial_metrics.items():
        logger.log_metric(k, v)

    with logger.train():
        while not ended:
            i += 1

            # Observe environment state
            logger.log_current_epoch(i)

            if env.can_label:
                # Obtain label request from strategy
                obs, preds = env.observe(20 if args.strategy ==
                                         "bald" else 1)
                if args.strategy != "bald":
                    preds = preds[0]
                if args.strategy == "aggressive":
                    label_request = aggressive(preds)
                elif args.strategy == "random":
                    label_request = random(preds)
                elif args.strategy == "passive":
                    label_request = passive(preds)
                elif use_strategy:
                    label_request = strategy.observe(preds)
                else:
                    raise ValueError()

                # Label solicitation
                labeled = env.label(label_request)
                if use_strategy:
                    strategy.update(
                        sum([
                            np.sum(s.flatten())
                            for s in label_request.values()
                        ]),
                        sum([
                            np.sum(np.ones_like(s).flatten())
                            for s in label_request.values()
                        ]))
            else:
                break

            # Environment stepping
            ended = env.step()
            # Fit every al_batch of items
            best = env.fit(prefix=model_id, reset_model=True)
            for k, v in best.items():
                logger.log_metric(k, v)
            env.load(prefix=model_id)

    # Final fit
    final_metrics = env.fit(epochs=args.final_epochs,
                            prefix="final_fit_" + model_id,
                            reset_model=True)
    for k, v in final_metrics.items():
        logger.log_metric("Final " + k, v)
        logging.info("Final " + k + ": " + str(v))
    logging.info("Run finished.")
Ejemplo n.º 9
0
def main():
    experiment = Experiment(api_key="1x1ZQpvbtvDyO2s5DrlUyYpzv",
                            project_name="GAN1",
                            workspace="verlyn-fischer")

    discriminator_path = 'models/discriminator.pth'
    generator_path = 'models/generator.pth'

    # Load data
    data = mnist_data()
    # Create loader with data, so that we can iterate over it
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=100,
                                              shuffle=True)
    # Num batches
    num_batches = len(data_loader)
    discriminator = DiscriminatorNet()
    generator = GeneratorNet()

    d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002)
    g_optimizer = optim.Adam(generator.parameters(), lr=0.0002)

    loss = nn.BCELoss()

    num_test_samples = 16
    test_noise = noise(num_test_samples)

    # Create logger instance
    # logger = Logger(model_name='VGAN', data_name='MNIST')
    # Total number of epochs to train
    num_epochs = 70
    for epoch in range(num_epochs):
        print(f'Epoch: {epoch}')
        for n_batch, (real_batch, _) in enumerate(data_loader):
            N = real_batch.size(0)
            # 1. Train Discriminator
            real_data = Variable(images_to_vectors(real_batch))
            # Generate fake data and detach
            # (so gradients are not calculated for generator)
            fake_data = generator(noise(N)).detach()
            # Train D
            d_error, d_pred_real, d_pred_fake = \
                train_discriminator(d_optimizer, real_data, fake_data, discriminator, loss)

            # 2. Train Generator
            # Generate fake data
            fake_data = generator(noise(N))
            # Train G
            g_error = train_generator(g_optimizer, fake_data, discriminator,
                                      loss)
            # Log batch error
            # logger.log(d_error, g_error, epoch, n_batch, num_batches)
            # Display Progress every few batches
            experiment.log_metric("d_error", d_error, step=n_batch)
            experiment.log_metric("g_error", g_error, step=n_batch)
            # if (n_batch) % 100 == 0:
            #     # logger.log_images(
            #     #     test_images, num_test_samples,
            #     #     epoch, n_batch, num_batches
            #     # );
            #     # Display status Logs
            #     # logger.display_status(
            #     #     epoch, num_epochs, n_batch, num_batches,
            #     #     d_error, g_error, d_pred_real, d_pred_fake
            #     # )

        # Plot test images after each epoch
        test_images = vectors_to_images(generator(test_noise))
        test_images = test_images.data
        plot_test_images(test_images, experiment, False)

    # Save models and log images

    torch.save(discriminator.state_dict(), discriminator_path)
    torch.save(discriminator.state_dict(), generator_path)

    test_images = vectors_to_images(generator(test_noise))
    test_images = test_images.data
    plot_test_images(test_images, experiment, True)
Ejemplo n.º 10
0
class CometMLMonitor(MonitorBase):
    """
    Send scalar data and the graph to https://www.comet.ml.

    Note:
        1. comet_ml requires you to `import comet_ml` before importing tensorflow or tensorpack.
        2. The "automatic output logging" feature of comet_ml will make the training progress bar appear to freeze.
           Therefore the feature is disabled by default.
    """
    def __init__(self, experiment=None, tags=None, **kwargs):
        """
        Args:
            experiment (comet_ml.Experiment): if provided, invalidate all other arguments
            tags (list[str]): experiment tags
            kwargs: arguments used to initialize :class:`comet_ml.Experiment`,
                such as project name, API key, etc.
                Refer to its documentation for details.
        """
        if experiment is not None:
            self._exp = experiment
            assert tags is None and len(kwargs) == 0
        else:
            from comet_ml import Experiment
            kwargs.setdefault(
                'log_code', True
            )  # though it's not functioning, git patch logging requires it
            kwargs.setdefault('auto_output_logging', None)
            self._exp = Experiment(**kwargs)
            if tags is not None:
                self._exp.add_tags(tags)

        self._exp.set_code("Code logging is impossible ...")
        self._exp.log_dependency('tensorpack', __git_version__)

    @property
    def experiment(self):
        """
        The :class:`comet_ml.Experiment` instance.
        """
        return self._exp

    def _before_train(self):
        self._exp.set_model_graph(tf.get_default_graph())

    @HIDE_DOC
    def process_scalar(self, name, val):
        self._exp.log_metric(name, val, step=self.global_step)

    @HIDE_DOC
    def process_image(self, name, val):
        self._exp.set_step(self.global_step)
        for idx, v in enumerate(val):
            log_name = "{}_step{}{}".format(
                name, self.global_step, "_" + str(idx) if len(val) > 1 else "")

            self._exp.log_image(v,
                                image_format="jpeg",
                                name=log_name,
                                image_minmax=(0, 255))

    def _after_train(self):
        self._exp.end()

    def _after_epoch(self):
        self._exp.log_epoch_end(self.epoch_num)
            groups = data_reader.groups
            all_scores = []

            for i in range(3):
                ae = Autoencoder(config[i]["encoder"],
                                 config[i]["decoder"],
                                 input_shape=input_shapes[i],
                                 latent_shape=latent_shape,
                                 loss="mean_squared_error",
                                 optimizer_params=None)

                experiment.log_multiple_params(config[i])

                scores = ae.cross_validate(data[i],
                                           groups,
                                           experiment=experiment,
                                           epochs=10000,
                                           n_splits=4,
                                           log_prefix=f"dataset_{i}_")

                all_scores.append(scores)

                mean_scores = np.mean(scores)

                experiment.log_metric(f"mean_scores_{i}", mean_scores)

                experiment.log_other(f"scores_{i}", scores)

            experiment.log_metric(f"mean_all_scores", np.mean(all_scores))
            print(all_scores)
Ejemplo n.º 12
0
experiment.add_tag("vanilla-resnet")


from torch import nn, optim
from tqdm import trange

from film_test.resnet import resnet18
from film_test.traintest import train, test, device

EPOCHS = 24

net = resnet18(num_classes=2)
net = net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

trainloader, testloader = qa_cifar()

for epoch in trange(EPOCHS):
    experiment.log_metric("epoch", epoch)
    train(
        net,
        trainloader,
        epoch,
        optimizer,
        criterion,
        qa=True,
        comet=experiment)
    test(net, testloader, criterion, qa=True, comet=experiment)
Ejemplo n.º 13
0
    net = ResNet_32(BasicBlock, [7, 7, 7], num_classes=10).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=0.1,
                                momentum=0.9,
                                weight_decay=1e-4)
    epoch = 185
    batch_size = 128
    num_workers = 16

    train_loader = get_training_dataloader(batch_size, num_workers)
    test_loader = get_test_dataloader(batch_size, num_workers)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [95, 140],
                                                     gamma=0.1)
    start_time = time.time()
    step = 0
    best_acc = 0.0
    for i in range(1, epoch + 1):
        step = train(net, i, train_loader, optimizer, criterion, step)
        t_acc = eval_training(net, i, test_loader, step)
        if t_acc > best_acc:
            best_acc = t_acc
        scheduler.step()
        experiment.log_metric('lr',
                              value=optimizer.param_groups[0]['lr'],
                              step=i)

    end_time = time.time()
    print("time cost: %.4f min" % (float(end_time - start_time) / 60))
    print("best test_set acc : %.4f" % best_acc)
def train_net(net):

    path = net.path
    hidden_sizes = net.hyperparameters["hidden_sizes"]
    n_epochs = net.hyperparameters["n_epochs"]
    batch_size = net.hyperparameters["batch_size"]
    n_it_neg = net.hyperparameters["n_it_neg"]  # n_iterations in free phase
    n_it_pos = net.hyperparameters[
        "n_it_pos"]  # n_iterations in weekly clamped phase
    epsilon = net.hyperparameters["epsilon"]
    beta = net.hyperparameters["beta"]
    alphas = net.hyperparameters["alphas"]

    print("name = %s" % (path))
    print("architecture = 784-" + "-".join([str(n)
                                            for n in hidden_sizes]) + "-10")
    print("number of epochs = %i" % (n_epochs))
    print("batch_size = %i" % (batch_size))
    print("n_it_neg = %i" % (n_it_neg))
    print("n_it_pos = %i" % (n_it_pos))
    print("epsilon = %.1f" % (epsilon))
    print("beta = %.1f" % (beta))
    print("learning rates: " + " ".join(
        ["alpha_W%i=%.3f" % (k + 1, alpha)
         for k, alpha in enumerate(alphas)]) + "\n")

    experiment = Experiment(project_name='eqprop')
    experiment.log_parameters({
        'original_implementation': True,
        'net_type': 1,
        'max_steps': n_it_neg,
        'use_predictors': False
    })

    n_batches_train = 50000 // batch_size
    n_batches_valid = 10000 // batch_size

    start_time = time.perf_counter()

    for epoch in range(n_epochs):

        ### TRAINING ###

        # CUMULATIVE SUM OF TRAINING ENERGY, TRAINING COST AND TRAINING ERROR
        measures_sum = [0., 0., 0.]
        gW = [0.] * len(alphas)

        for index in range(n_batches_train):

            # CHANGE THE INDEX OF THE MINI BATCH (= CLAMP X AND INITIALIZE THE HIDDEN AND OUTPUT LAYERS WITH THE PERSISTENT PARTICLES)
            net.change_mini_batch_index(index)

            # FREE PHASE
            net.free_phase(n_it_neg, epsilon)

            # MEASURE THE ENERGY, COST AND ERROR AT THE END OF THE FREE PHASE RELAXATION
            measures = net.measure()
            measures_sum = [
                measure_sum + measure
                for measure_sum, measure in zip(measures_sum, measures)
            ]
            measures_avg = [
                measure_sum / (index + 1) for measure_sum in measures_sum
            ]
            measures_avg[
                -1] *= 100.  # measures_avg[-1] corresponds to the error rate, which we want in percentage
            stdout.write("\repoch-%2i-train-%5i E=%.1f C=%.5f error=%.3f%%" %
                         (epoch, (index + 1) * batch_size, measures_avg[0],
                          measures_avg[1], measures_avg[2]))
            stdout.flush()

            _step = epoch * n_batches_train + index
            experiment.log_metric('energy', measures_avg[0], step=_step)
            experiment.log_metric('cost', measures_avg[1], step=_step)
            experiment.log_metric('accuracy',
                                  100 - measures_avg[2],
                                  step=_step)

            # WEAKLY CLAMPED PHASE
            sign = 2 * np.random.randint(0, 2) - 1  # random sign +1 or -1
            beta = np.float32(sign * beta)  # choose the sign of beta at random

            Delta_logW = net.weakly_clamped_phase(n_it_pos, epsilon, beta,
                                                  *alphas)
            gW = [
                gW1 + Delta_logW1 for gW1, Delta_logW1 in zip(gW, Delta_logW)
            ]

        stdout.write("\n")
        dlogW = [100. * gW1 / n_batches_train for gW1 in gW]
        print("   " + " ".join([
            "dlogW%i=%.3f%%" % (k + 1, dlogW1)
            for k, dlogW1 in enumerate(dlogW)
        ]))

        net.training_curves["training error"].append(measures_avg[-1])

        ### VALIDATION ###

        # CUMULATIVE SUM OF VALIDATION ENERGY, VALIDATION COST AND VALIDATION ERROR
        measures_sum = [0., 0., 0.]

        for index in range(n_batches_valid):

            # CHANGE THE INDEX OF THE MINI BATCH (= CLAMP X AND INITIALIZE THE HIDDEN AND OUTPUT LAYERS WITH THE PERSISTENT PARTICLES)
            net.change_mini_batch_index(n_batches_train + index)

            # FREE PHASE
            net.free_phase(n_it_neg, epsilon)

            # MEASURE THE ENERGY, COST AND ERROR AT THE END OF THE FREE PHASE RELAXATION
            measures = net.measure()
            measures_sum = [
                measure_sum + measure
                for measure_sum, measure in zip(measures_sum, measures)
            ]
            measures_avg = [
                measure_sum / (index + 1) for measure_sum in measures_sum
            ]
            measures_avg[
                -1] *= 100.  # measures_avg[-1] corresponds to the error rate, which we want in percentage
            stdout.write("\r   valid-%5i E=%.1f C=%.5f error=%.2f%%" %
                         ((index + 1) * batch_size, measures_avg[0],
                          measures_avg[1], measures_avg[2]))
            stdout.flush()

        stdout.write("\n")

        net.training_curves["validation error"].append(measures_avg[-1])

        duration = (time.perf_counter() - start_time) / 60.
        print(("   duration=%.1f min" % (duration)))

        # SAVE THE PARAMETERS OF THE NETWORK AT THE END OF THE EPOCH
        net.save_params()
Ejemplo n.º 15
0
best_train_acc = 0
best_test_acc = 0
for epoch in range(start_epoch, start_epoch + 2000):
    print(model_name)
    train_loss, train_acc = train(epoch)
    test_loss, test_acc = test(epoch)
    training_loss_list.append(train_loss)
    testing_loss_list.append(test_loss)
    training_acc_list.append(train_acc)
    testing_acc_list.append(test_acc)
    if (train_acc > best_train_acc):
        best_train_acc = train_acc
    if (test_acc > best_test_acc):
        best_test_acc = test_acc

    experiment.log_metric("best_train_acc", best_train_acc, epoch=epoch + 1)
    experiment.log_metric("best_test_acc", best_test_acc, epoch=epoch + 1)
    experiment.log_metric("train_acc", train_acc, epoch=epoch + 1)
    experiment.log_metric("test_acc", test_acc, epoch=epoch + 1)

    plt.plot(training_loss_list, color='blue', label='Training')
    plt.plot(testing_loss_list, color='red', label='Testing', alpha=.5)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss plot')
    plt.legend()
    plt.savefig("./loss_plot_" + model_name + ".png", format='png')
    experiment.log_figure(figure=plt, figure_name='loss_plot', overwrite=True)
    plt.close()

    plt.plot(training_acc_list, color='blue', label='Training')
Ejemplo n.º 16
0
            dis_optimizer.step()

            gen_optimizer.zero_grad()
            gen_loss.backward(retain_graph=True)
            gen_optimizer.step()

            print(
                "(Global Step {}) (Epoch {}) (Step {}) (Img Dis Loss {}) (Img Gen Loss {}) (Seq Dis Loss {}) (Seq Gen Loss {}) (Dis Loss {}) (Gen Loss {})"
                .format(global_step, epoch, i, img_dis_loss.item(),
                        img_gen_loss.item(), seq_dis_loss.item(),
                        seq_gen_loss.item(), dis_loss.item(), gen_loss.item()),
                end='\r',
                flush=True)

            experiment.log_metric("img_dis_loss",
                                  img_dis_loss.item(),
                                  step=global_step)
            experiment.log_metric("img_gen_loss",
                                  img_gen_loss.item(),
                                  step=global_step)
            experiment.log_metric("seq_dis_loss",
                                  seq_dis_loss.item(),
                                  step=global_step)
            experiment.log_metric("seq_gen_loss",
                                  seq_gen_loss.item(),
                                  step=global_step)
            experiment.log_metric("dis_loss",
                                  dis_loss.item(),
                                  step=global_step)
            experiment.log_metric("gen_loss",
                                  gen_loss.item(),
Ejemplo n.º 17
0
        )

        with tf.Session(config=config) as sess:
            model = RPN3D(
                cls=cfg.DETECT_OBJ,
                single_batch_size=args.single_batch_size,
                avail_gpus=cfg.GPU_AVAILABLE.split(',')
            )
            if tf.train.get_checkpoint_state(save_model_dir):
                print("Reading model parameters from %s" % save_model_dir)
                model.saver.restore(
                    sess, tf.train.latest_checkpoint(save_model_dir))
            counter=0
            with experiment.test():
                for batch in iterate_data(val_dir, shuffle=False, aug=False, is_testset=False, batch_size=args.single_batch_size * cfg.GPU_USE_COUNT, multi_gpu_sum=cfg.GPU_USE_COUNT):
                    experiment.log_metric("counter",counter)

                    if args.vis:
                        tags, results, front_images, bird_views, heatmaps = model.predict_step(sess, batch, summary=False, vis=True)
                    else:
                        tags, results = model.predict_step(sess, batch, summary=False, vis=False)

                    # ret: A, B
                    # A: (N) tag
                    # B: (N, N') (class, x, y, z, h, w, l, rz, score)
                    for tag, result in zip(tags, results):
                        of_path = os.path.join(args.output_path, 'data', tag + '.txt')
                        with open(of_path, 'w+') as f:
                            labels = box3d_to_label([result[:, 1:8]], [result[:, 0]], [result[:, -1]], coordinate='lidar')[0]
                            for line in labels:
                                f.write(line)
Ejemplo n.º 18
0
                        step, mean_IU)
                    filepath = os.path.join(args.snapshot_dir, filename)
                    save_checkpoint(model.student,
                                    filepath,
                                    optimizer=None,
                                    meta=None)

            if step % 10000 == 0 or step in [100, 200, 300, 1000]:
                filename = 'CS_scenes_step-{:d}_mIU-{:.4f}.pth'.format(
                    step, mean_IU)
                filepath = os.path.join(args.snapshot_dir, filename)
                torch.save(model.student.state_dict(), filepath)
                filename = 'mmseg_step-{:d}_mIU-{:.4f}.pth'.format(
                    step, mean_IU)
                filepath = os.path.join(args.snapshot_dir, filename)
                save_checkpoint(model.student,
                                filepath,
                                optimizer=None,
                                meta=None)

                # checkpoint = {'state_dict': weights_to_cpu(get_state_dict(model.student))}
                # torch.save(checkpoint, filename)

            if args.api_key:
                experiment.log_metric('mean_IU', mean_IU, step=step)
                for i in range(len(trainset.class_name)):
                    experiment.log_metric(trainset.class_name[i],
                                          IU_array[i],
                                          step=step)
val_log.close()
Ejemplo n.º 19
0
                'batch_repulsive': br,
                'bandwidth_repulsive': bandwidth_repulsive,
                'lambda_repulsive': args.lambda_repulsive
            }
        else:
            kwargs = {}

        data, target = data.cpu(), target.cpu()
        info_batch = optimize(net,
                              optimizer,
                              batch=(data, target),
                              add_repulsive_constraint=args.repulsive
                              is not None,
                              **kwargs)
        step += 1
        for k, v in info_batch.items():
            experiment.log_metric('train_{}'.format(k), v, step=step)

# Save the model
if not Path.exists(savepath / 'models'):
    os.makedirs(savepath / 'models')

model_path = savepath / 'models' / '{}_{}epochs.pt'.format(
    model_name, epoch + 1)
if not Path.exists(model_path):
    torch.save(net.state_dict(), model_path)
else:
    raise ValueError(
        'Error trying to save file at location {}: File already exists'.format(
            model_path))
Ejemplo n.º 20
0
def train(normal_digit, anomalies, folder, file, p_train, p_test):

    # Create an experiment
    experiment = Experiment(project_name="deep-stats-thesis",
                            workspace="stecaron",
                            disabled=True)
    experiment.add_tag("mnist_kpca")

    # General parameters
    DOWNLOAD_MNIST = True
    PATH_DATA = os.path.join(os.path.expanduser("~"), 'Downloads/mnist')

    # Define training parameters
    hyper_params = {
        "TRAIN_SIZE": 2000,
        "TRAIN_NOISE": p_train,
        "TEST_SIZE": 800,
        "TEST_NOISE": p_test,
        # on which class we want to learn outliers
        "CLASS_SELECTED": [normal_digit],
        # which class we want to corrupt our dataset with
        "CLASS_CORRUPTED": anomalies,
        "INPUT_DIM": 28 * 28,  # In the case of MNIST
        "ALPHA": p_test,  # level of significance for the test
        # hyperparameters gamma in rbf kPCA
        "GAMMA": [1],
        "N_COMP": [30]
    }

    # Log experiment parameterso0p
    experiment.log_parameters(hyper_params)

    # Load data
    train_data, test_data = load_mnist(PATH_DATA, download=DOWNLOAD_MNIST)

    # Normalize data
    train_data.data = train_data.data / 255.
    test_data.data = test_data.data / 255.

    # Build "train" and "test" datasets
    id_maj_train = numpy.random.choice(numpy.where(
        numpy.isin(train_data.train_labels,
                   hyper_params["CLASS_SELECTED"]))[0],
                                       int((1 - hyper_params["TRAIN_NOISE"]) *
                                           hyper_params["TRAIN_SIZE"]),
                                       replace=False)
    id_min_train = numpy.random.choice(numpy.where(
        numpy.isin(train_data.train_labels,
                   hyper_params["CLASS_CORRUPTED"]))[0],
                                       int(hyper_params["TRAIN_NOISE"] *
                                           hyper_params["TRAIN_SIZE"]),
                                       replace=False)
    id_train = numpy.concatenate((id_maj_train, id_min_train))

    id_maj_test = numpy.random.choice(numpy.where(
        numpy.isin(test_data.test_labels, hyper_params["CLASS_SELECTED"]))[0],
                                      int((1 - hyper_params["TEST_NOISE"]) *
                                          hyper_params["TEST_SIZE"]),
                                      replace=False)
    id_min_test = numpy.random.choice(numpy.where(
        numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]))[0],
                                      int(hyper_params["TEST_NOISE"] *
                                          hyper_params["TEST_SIZE"]),
                                      replace=False)
    id_test = numpy.concatenate((id_min_test, id_maj_test))

    train_data.data = train_data.data[id_train]
    train_data.targets = train_data.targets[id_train]

    test_data.data = test_data.data[id_test]
    test_data.targets = test_data.targets[id_test]

    train_data.targets = numpy.isin(train_data.train_labels,
                                    hyper_params["CLASS_CORRUPTED"])
    test_data.targets = numpy.isin(test_data.test_labels,
                                   hyper_params["CLASS_CORRUPTED"])

    # Flatten the data and transform to numpy array
    train_data.data = train_data.data.view(-1, 28 * 28).numpy()
    test_data.data = test_data.data.view(-1, 28 * 28).numpy()

    # Train kPCA
    # param_grid = [{"gamma": hyper_params["GAMMA"],
    #                "n_components": hyper_params["N_COMP"]}]

    param_grid = [{"n_components": hyper_params["N_COMP"]}]

    # kpca = KernelPCA(fit_inverse_transform=True,
    #                  kernel="rbf",
    #                  remove_zero_eig=True,
    #                  n_jobs=-1)

    kpca = PCA()

    #my_scorer2 = make_scorer(my_scorer, greater_is_better=True)
    # grid_search = GridSearchCV(kpca, param_grid, cv=ShuffleSplit(
    #     n_splits=3), scoring=my_scorer)
    kpca.fit(train_data.data)
    X_kpca = kpca.transform(train_data.data)
    X_train_back = kpca.inverse_transform(X_kpca)
    X_test_back = kpca.inverse_transform(kpca.transform(test_data.data))

    # Compute the distance between original data and reconstruction
    dist_train = numpy.linalg.norm(train_data.data - X_train_back,
                                   ord=2,
                                   axis=1)
    dist_test = numpy.linalg.norm(test_data.data - X_test_back, ord=2, axis=1)

    # Test performances on train
    train_anomalies_ind = numpy.argsort(dist_train)[int(
        (1 - hyper_params["ALPHA"]) *
        hyper_params["TRAIN_SIZE"]):int(hyper_params["TRAIN_SIZE"])]
    train_predictions = numpy.zeros(hyper_params["TRAIN_SIZE"])
    train_predictions[train_anomalies_ind] = 1

    train_recall = metrics.recall_score(train_data.targets, train_predictions)
    train_precision = metrics.precision_score(train_data.targets,
                                              train_predictions)
    train_f1_score = metrics.f1_score(train_data.targets, train_predictions)
    train_auc = metrics.roc_auc_score(train_data.targets, train_predictions)

    print(f"Train Precision: {train_precision}")
    print(f"Train Recall: {train_recall}")
    print(f"Train F1 Score: {train_f1_score}")
    print(f"Train AUC: {train_auc}")
    experiment.log_metric("train_precision", train_precision)
    experiment.log_metric("train_recall", train_recall)
    experiment.log_metric("train_f1_score", train_f1_score)
    experiment.log_metric("train_auc", train_auc)

    # Test performances on test
    test_probs = numpy.array(
        [numpy.sum(xi >= dist_train) / len(dist_train) for xi in dist_test],
        dtype=float)
    test_anomalies_ind = numpy.argwhere(
        test_probs >= 1 - hyper_params["ALPHA"])
    test_predictions = numpy.zeros(hyper_params["TEST_SIZE"])
    test_predictions[test_anomalies_ind] = 1

    test_recall = metrics.recall_score(test_data.targets, test_predictions)
    test_precision = metrics.precision_score(test_data.targets,
                                             test_predictions)
    test_f1_score = metrics.f1_score(test_data.targets, test_predictions)
    test_auc = metrics.roc_auc_score(test_data.targets, test_probs)
    test_average_precision = metrics.average_precision_score(
        test_data.targets, test_predictions)

    print(f"Test Precision: {test_precision}")
    print(f"Test Recall: {test_recall}")
    print(f"Test F1 Score: {test_f1_score}")
    print(f"Test AUC: {test_auc}")
    print(f"Test average Precision: {test_average_precision}")
    experiment.log_metric("test_precision", test_precision)
    experiment.log_metric("test_recall", test_recall)
    experiment.log_metric("test_f1_score", test_f1_score)
    experiment.log_metric("test_auc", test_auc)
    experiment.log_metric("test_average_precision", test_average_precision)

    # Save the results in the output file
    col_names = [
        "timestamp", "precision", "recall", "f1_score", "average_precision",
        "auc"
    ]
    results_file = os.path.join(folder, "results_" + file + ".csv")
    if os.path.exists(results_file):
        df_results = pandas.read_csv(results_file, names=col_names, header=0)
    else:
        df_results = pandas.DataFrame(columns=col_names)

    df_results = df_results.append(pandas.DataFrame(numpy.concatenate(
        (numpy.array(
            datetime.datetime.fromtimestamp(
                time.time()).strftime('%Y-%m-%d %H:%M:%S')).reshape(1),
         test_precision.reshape(1), test_recall.reshape(1),
         test_f1_score.reshape(1), test_average_precision.reshape(1),
         test_auc.reshape(1))).reshape(1, -1),
                                                    columns=col_names),
                                   ignore_index=True)

    df_results.to_csv(results_file)
Ejemplo n.º 21
0
class Logger:
    def __init__(self, send_logs, tags, parameters, experiment=None):
        self.stations = 5
        self.send_logs = send_logs
        if self.send_logs:
            if experiment is None:
                json_loc = glob.glob("./**/comet_token.json")[0]
                with open(json_loc, "r") as f:
                    kwargs = json.load(f)

                self.experiment = Experiment(**kwargs)
            else:
                self.experiment = experiment
        self.sent_mb = 0
        self.speed_window = deque(maxlen=100)
        self.step_time = None
        self.current_speed = 0
        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)

    def begin_logging(self, episode_count, steps_per_ep, sigma, theta, step_time):
        self.step_time = step_time
        if self.send_logs:
            self.experiment.log_parameter("Episode count", episode_count)
            self.experiment.log_parameter("Steps per episode", steps_per_ep)
            self.experiment.log_parameter("theta", theta)
            self.experiment.log_parameter("sigma", sigma)

    def log_round(self, states, reward, cumulative_reward, info, loss, observations, step):
        self.experiment.log_histogram_3d(states, name="Observations", step=step)
        info = [[j for j in i.split("|")] for i in info]
        info = np.mean(np.array(info, dtype=np.float32), axis=0)
        try:
            # round_mb = np.mean([float(i.split("|")[0]) for i in info])
            round_mb = info[0]
        except Exception as e:
            print(info)
            print(reward)
            raise e
        self.speed_window.append(round_mb)
        self.current_speed = np.mean(np.asarray(self.speed_window)/self.step_time)
        self.sent_mb += round_mb
        # CW = np.mean([float(i.split("|")[1]) for i in info])
        CW = info[1]
        # stations = np.mean([float(i.split("|")[2]) for i in info])
        self.stations = info[2]
        fairness = info[3]

        if self.send_logs:
            self.experiment.log_metric("Round reward", np.mean(reward), step=step)
            self.experiment.log_metric("Per-ep reward", np.mean(cumulative_reward), step=step)
            self.experiment.log_metric("Megabytes sent", self.sent_mb, step=step)
            self.experiment.log_metric("Round megabytes sent", round_mb, step=step)
            self.experiment.log_metric("Chosen CW", CW, step=step)
            self.experiment.log_metric("Station count", self.stations, step=step)
            self.experiment.log_metric("Current throughput", self.current_speed, step=step)
            self.experiment.log_metric("Fairness index", fairness, step=step)

            for i, obs in enumerate(observations):
                self.experiment.log_metric(f"Observation {i}", obs, step=step)

            self.experiment.log_metrics(loss, step=step)

    def log_episode(self, cumulative_reward, speed, step):
        if self.send_logs:
            self.experiment.log_metric("Cumulative reward", cumulative_reward, step=step)
            self.experiment.log_metric("Speed", speed, step=step)

        self.sent_mb = 0
        self.last_speed = speed
        self.speed_window = deque(maxlen=100)
        self.current_speed = 0

    def end(self):
        if self.send_logs:
            self.experiment.end()
Ejemplo n.º 22
0
                    running_loss += loss.item()
                    total_batches = total_batches + 1.0

                    # running loss intervals
                    div = 125
                    if i == 0:
                        div = 1
                    if i % div == 0 and i != 0:  # print every 1024 mini-batches

                        print(
                            '[%d, %d] E[loss]: %.20f loss: %.20f acc 1: %.20f acc 5: %.20f]'
                            % (epoch, i, float(expected_loss) /
                               (float(i + 1)), float(running_loss) /
                               float(div), float(acc_1) / float(total_batches),
                               float(acc_5) / float(total_batches)))
                        experiment.log_metric('Epoch', epoch)
                        experiment.log_metric('Training_iteration', i)
                        experiment.log_metric(
                            'E[loss]',
                            float(expected_loss) / float(i + 1))
                        experiment.log_metric('Running_loss',
                                              float(running_loss) / float(div))
                        experiment.log_metric(
                            'Acc@1',
                            float(acc_1) / float(total_batches))
                        experiment.log_metric(
                            'Acc@5',
                            float(acc_5) / float(total_batches))
                        experiment.log_metric(
                            'Acc@10',
                            float(acc_10) / float(total_batches))
Ejemplo n.º 23
0
class CorefSolver():
    def __init__(self, args):
        self.args = args
        self.data_utils = data_utils(args)
        self.disable_comet = args.disable_comet
        self.model = self.make_model(
            src_vocab=self.data_utils.vocab_size,
            tgt_vocab=self.data_utils.vocab_size,
            N=args.num_layer,
            dropout=args.dropout,
            entity_encoder_type=args.entity_encoder_type)
        print(self.model)
        if self.args.train:
            self.outfile = open(self.args.logfile, 'w')
            self.model_dir = make_save_dir(args.model_dir)
            # self.logfile = os.path.join(args.logdir, args.exp_name)
            # self.log = SummaryWriter(self.logfile)
            self.w_valid_file = args.w_valid_file

    def make_model(self,
                   src_vocab,
                   tgt_vocab,
                   N=6,
                   dropout=0.1,
                   d_model=512,
                   entity_encoder_type='linear',
                   d_ff=2048,
                   h=8):

        "Helper: Construct a model from hyperparameters."
        c = copy.deepcopy
        attn = MultiHeadedAttention(h, d_model)
        attn_ner = MultiHeadedAttention(1, d_model, dropout)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        position = PositionalEncoding(d_model, dropout)
        embed = Embeddings(d_model, src_vocab)
        word_embed = nn.Sequential(embed, c(position))
        print('pgen', self.args.pointer_gen)

        if entity_encoder_type == 'transformer':
            # entity_encoder = nn.Sequential(embed, Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), 1))
            print('transformer')
            entity_encoder = Seq_Entity_Encoder(
                embed,
                Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), 2))
        elif entity_encoder_type == 'albert':
            albert_tokenizer = AlbertTokenizer.from_pretrained(
                'albert-base-v2')
            albert = AlbertModel.from_pretrained('albert-base-v2')
            entity_encoder = Albert_Encoder(albert, albert_tokenizer, d_model)
        elif entity_encoder_type == 'gru':
            entity_encoder = RNNEncoder(embed,
                                        'GRU',
                                        d_model,
                                        d_model,
                                        num_layers=1,
                                        dropout=0.1,
                                        bidirectional=True)
            print('gru')
        elif entity_encoder_type == 'lstm':
            entity_encoder = RNNEncoder(embed,
                                        'LSTM',
                                        d_model,
                                        d_model,
                                        num_layers=1,
                                        dropout=0.1,
                                        bidirectional=True)
            print('lstm')

        if self.args.ner_at_embedding:
            model = EncoderDecoderOrg(
                Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
                DecoderOrg(
                    DecoderLayerOrg(d_model, c(attn), c(attn), c(ff), dropout),
                    N, d_model, tgt_vocab, self.args.pointer_gen), word_embed,
                word_embed, entity_encoder)
        else:
            if self.args.ner_last:
                decoder = Decoder(
                    DecoderLayer(d_model, c(attn), c(attn), c(ff),
                                 dropout), N, d_model, tgt_vocab,
                    self.args.pointer_gen, self.args.ner_last)
            else:
                decoder = Decoder(
                    DecoderLayer_ner(d_model, c(attn), c(attn), attn_ner,
                                     c(ff), dropout, self.args.fusion), N,
                    d_model, tgt_vocab, self.args.pointer_gen,
                    self.args.ner_last)
            model = EncoderDecoder(
                Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
                decoder, word_embed, word_embed, entity_encoder)

        # This was important from their code.
        # Initialize parameters with Glorot / fan_avg.
        for p in model.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

        # levels = 3
        # num_chans = [d_model] * (args.levels)
        # k_size = 5
        # tcn = TCN(embed, d_model, num_channels, k_size, dropout=dropout)

        return model.cuda()

    def train(self):
        if not self.disable_comet:
            # logging
            hyper_params = {
                "num_layer": self.args.num_layer,
                "pointer_gen": self.args.pointer_gen,
                "ner_last": self.args.ner_last,
                "entity_encoder_type": self.args.entity_encoder_type,
                "fusion": self.args.fusion,
                "dropout": self.args.dropout,
            }
            COMET_PROJECT_NAME = 'summarization'
            COMET_WORKSPACE = 'timchen0618'

            self.exp = Experiment(
                api_key='mVpNOXSjW7eU0tENyeYiWZKsl',
                project_name=COMET_PROJECT_NAME,
                workspace=COMET_WORKSPACE,
                auto_output_logging='simple',
                auto_metric_logging=None,
                display_summary=False,
            )
            self.exp.log_parameters(hyper_params)
            self.exp.add_tags([
                '%s entity_encoder' % self.args.entity_encoder_type,
                self.args.fusion
            ])
            if self.args.ner_last:
                self.exp.add_tag('ner_last')
            if self.args.ner_at_embedding:
                self.exp.add_tag('ner_at_embedding')
            self.exp.set_name(self.args.exp_name)
            self.exp.add_tag('coreference')

        print('ner_last ', self.args.ner_last)
        print('ner_at_embedding', self.args.ner_at_embedding)
        # dataloader & optimizer
        data_yielder = self.data_utils.data_yielder(num_epoch=100)
        optim = torch.optim.Adam(self.model.parameters(),
                                 lr=1e-7,
                                 betas=(0.9, 0.998),
                                 eps=1e-8,
                                 amsgrad=True)  #get_std_opt(self.model)
        # entity_optim = torch.optim.Adam(self.entity_encoder.parameters(), lr=1e-7, betas=(0.9, 0.998), eps=1e-8, amsgrad=True)
        total_loss = []
        start = time.time()
        print('*' * 50)
        print('Start Training...')
        print('*' * 50)
        start_step = 0

        # if loading from checkpoint
        if self.args.load_model:
            state_dict = torch.load(self.args.load_model)['state_dict']
            self.model.load_state_dict(state_dict)
            print("Loading model from " + self.args.load_model + "...")
            # encoder_state_dict = torch.load(self.args.entity_encoder)['state_dict']
            # self.entity_encoder.load_state_dict(encoder_state_dict)
            # print("Loading entity_encoder from %s" + self.args.entity_encoder + "...")
            start_step = int(torch.load(self.args.load_model)['step'])
            print('Resume training from step %d ...' % start_step)

        warmup_steps = 10000
        d_model = 512
        lr = 1e-7
        for step in range(start_step, self.args.total_steps):
            self.model.train()
            batch = data_yielder.__next__()
            optim.zero_grad()
            # entity_optim.zero_grad()

            #update lr
            if step % 400 == 1:
                lr = (1 / (d_model**0.5)) * min(
                    (1 / (step / 4)**0.5), step * (1 / (warmup_steps**1.5)))
                for param_group in optim.param_groups:
                    param_group['lr'] = lr
                # for param_group in entity_optim.param_groups:
                #     param_group['lr'] = lr

            batch['src'] = batch['src'].long()
            batch['tgt'] = batch['tgt'].long()
            batch['ner'] = batch['ner'].long()
            batch['src_extended'] = batch['src_extended'].long()

            # forward the model
            if self.args.entity_encoder_type == 'albert':
                d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                    batch['ner_text'],
                    return_attention_masks=True,
                    max_length=10,
                    add_special_tokens=False,
                    pad_to_max_length=True,
                    return_tensors='pt')
                ner_mask = d['attention_mask'].cuda().unsqueeze(1)
                ner = d['input_ids'].cuda()
                # print('ner', ner.size())
                # print('ner_mask', ner_mask.size())
                # print('src_mask', batch['src_mask'].size())

            if self.args.entity_encoder_type == 'gru' or self.args.entity_encoder_type == 'lstm':
                ner_feat = self.model.entity_encoder(
                    batch['ner'].transpose(0, 1), batch['cluster_len'])[1]
            elif self.args.entity_encoder_type == 'transformer':
                mask = gen_mask(batch['cluster_len'])
                ner_feat = self.model.entity_encoder(batch['ner'], mask)
            ner, ner_mask = self.data_utils.pad_ner_feature(
                ner_feat.squeeze(), batch['num_clusters'],
                batch['src'].size(0))
            # print('ner', ner.size())
            # print('ner_mask', ner_mask.size())

            if self.args.ner_at_embedding:
                out = self.model.forward(batch['src'], batch['tgt'], ner,
                                         batch['src_mask'], batch['tgt_mask'],
                                         batch['src_extended'],
                                         len(batch['oov_list']))
            else:
                out = self.model.forward(batch['src'], batch['tgt'], ner,
                                         batch['src_mask'], batch['tgt_mask'],
                                         batch['src_extended'],
                                         len(batch['oov_list']), ner_mask)
            # print out info
            pred = out.topk(1, dim=-1)[1].squeeze().detach().cpu().numpy()[0]
            gg = batch['src_extended'].long().detach().cpu().numpy()[0][:100]
            tt = batch['tgt'].long().detach().cpu().numpy()[0]
            yy = batch['y'].long().detach().cpu().numpy()[0]

            #compute loss & update
            loss = self.model.loss_compute(out, batch['y'].long())
            loss.backward()
            optim.step()
            # entity_optim.step()

            total_loss.append(loss.detach().cpu().numpy())

            # logging information
            if step % self.args.print_every_steps == 1:
                elapsed = time.time() - start
                print("Epoch Step: %d Loss: %f Time: %f lr: %6.6f" %
                      (step, np.mean(total_loss), elapsed,
                       optim.param_groups[0]['lr']))
                self.outfile.write("Epoch Step: %d Loss: %f Time: %f\n" %
                                   (step, np.mean(total_loss), elapsed))
                print(
                    'src:\n',
                    self.data_utils.id2sent(gg, False, False,
                                            batch['oov_list']))
                print(
                    'tgt:\n',
                    self.data_utils.id2sent(yy, False, False,
                                            batch['oov_list']))
                print(
                    'pred:\n',
                    self.data_utils.id2sent(pred, False, False,
                                            batch['oov_list']))
                print('oov_list:\n', batch['oov_list'])

                if ner_mask != None and not self.args.ner_at_embedding:
                    pp = self.model.greedy_decode(
                        batch['src_extended'].long()[:1], ner[:1],
                        batch['src_mask'][:1], 100, self.data_utils.bos,
                        len(batch['oov_list']), self.data_utils.vocab_size,
                        True, ner_mask[:1])
                else:
                    pp = self.model.greedy_decode(
                        batch['src_extended'].long()[:1], ner[:1],
                        batch['src_mask'][:1], 100, self.data_utils.bos,
                        len(batch['oov_list']), self.data_utils.vocab_size,
                        True)

                pp = pp.detach().cpu().numpy()
                print(
                    'pred_greedy:\n',
                    self.data_utils.id2sent(pp[0], False, False,
                                            batch['oov_list']))

                print()
                start = time.time()
                if not self.disable_comet:
                    # self.log.add_scalar('Loss/train', np.mean(total_loss), step)
                    self.exp.log_metric('Train Loss',
                                        np.mean(total_loss),
                                        step=step)
                    self.exp.log_metric('Learning Rate',
                                        optim.param_groups[0]['lr'],
                                        step=step)

                    self.exp.log_text('Src: ' + self.data_utils.id2sent(
                        gg, False, False, batch['oov_list']))
                    self.exp.log_text('Tgt:' + self.data_utils.id2sent(
                        yy, False, False, batch['oov_list']))
                    self.exp.log_text('Pred:' + self.data_utils.id2sent(
                        pred, False, False, batch['oov_list']))
                    self.exp.log_text('Pred Greedy:' + self.data_utils.id2sent(
                        pp[0], False, False, batch['oov_list']))
                    self.exp.log_text('OOV:' + ' '.join(batch['oov_list']))

                total_loss = []

            ##########################
            # validation
            ##########################
            if step % self.args.valid_every_steps == 2:
                print('*' * 50)
                print('Start Validation...')
                print('*' * 50)
                self.model.eval()
                val_yielder = self.data_utils.data_yielder(1, valid=True)
                total_loss = []
                fw = open(self.w_valid_file, 'w')
                for batch in val_yielder:
                    with torch.no_grad():
                        batch['src'] = batch['src'].long()
                        batch['tgt'] = batch['tgt'].long()
                        batch['ner'] = batch['ner'].long()
                        batch['src_extended'] = batch['src_extended'].long()

                        ### ner ######
                        if self.args.entity_encoder_type == 'albert':
                            d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                                batch['ner_text'],
                                return_attention_masks=True,
                                max_length=10,
                                add_special_tokens=False,
                                pad_to_max_length=True,
                                return_tensors='pt')
                            ner_mask = d['attention_mask'].cuda().unsqueeze(1)
                            ner = d['input_ids'].cuda()

                        if self.args.entity_encoder_type == 'gru' or self.args.entity_encoder_type == 'lstm':
                            ner_feat = self.model.entity_encoder(
                                batch['ner'].transpose(0, 1),
                                batch['cluster_len'])[1]
                        elif self.args.entity_encoder_type == 'transformer':
                            mask = gen_mask(batch['cluster_len'])
                            ner_feat = self.model.entity_encoder(
                                batch['ner'], mask)
                        ner, ner_mask = self.data_utils.pad_ner_feature(
                            ner_feat.squeeze(), batch['num_clusters'],
                            batch['src'].size(0))
                        ### ner ######

                        if self.args.ner_at_embedding:
                            out = self.model.forward(batch['src'],
                                                     batch['tgt'], ner,
                                                     batch['src_mask'],
                                                     batch['tgt_mask'],
                                                     batch['src_extended'],
                                                     len(batch['oov_list']))
                        else:
                            out = self.model.forward(batch['src'],
                                                     batch['tgt'], ner,
                                                     batch['src_mask'],
                                                     batch['tgt_mask'],
                                                     batch['src_extended'],
                                                     len(batch['oov_list']),
                                                     ner_mask)
                        loss = self.model.loss_compute(out, batch['y'].long())
                        total_loss.append(loss.item())

                        if self.args.ner_at_embedding:
                            pred = self.model.greedy_decode(
                                batch['src_extended'].long(), ner,
                                batch['src_mask'], self.args.max_len,
                                self.data_utils.bos, len(batch['oov_list']),
                                self.data_utils.vocab_size)
                        else:
                            pred = self.model.greedy_decode(
                                batch['src_extended'].long(),
                                ner,
                                batch['src_mask'],
                                self.args.max_len,
                                self.data_utils.bos,
                                len(batch['oov_list']),
                                self.data_utils.vocab_size,
                                ner_mask=ner_mask)

                        for l in pred:
                            sentence = self.data_utils.id2sent(
                                l[1:], True, self.args.beam_size != 1,
                                batch['oov_list'])
                            fw.write(sentence)
                            fw.write("\n")
                fw.close()
                # files_rouge = FilesRouge()
                # scores = files_rouge.get_scores(self.w_valid_file, self.args.valid_tgt_file, avg=True)
                scores = cal_rouge_score(self.w_valid_file,
                                         self.args.valid_ref_file)
                r1_score = scores['rouge1']
                r2_score = scores['rouge2']

                print('=============================================')
                print('Validation Result -> Loss : %6.6f' %
                      (sum(total_loss) / len(total_loss)))
                print(scores)
                print('=============================================')
                self.outfile.write(
                    '=============================================\n')
                self.outfile.write('Validation Result -> Loss : %6.6f\n' %
                                   (sum(total_loss) / len(total_loss)))
                self.outfile.write(
                    '=============================================\n')
                # self.model.train()
                # self.log.add_scalar('Loss/valid', sum(total_loss)/len(total_loss), step)
                # self.log.add_scalar('Score/valid', r1_score, step)
                if not self.disable_comet:
                    self.exp.log_metric('Valid Loss',
                                        sum(total_loss) / len(total_loss),
                                        step=step)
                    self.exp.log_metric('R1 Score', r1_score, step=step)
                    self.exp.log_metric('R2 Score', r2_score, step=step)

                #Saving Checkpoint
                w_step = int(step / 10000)
                print('Saving ' + str(w_step) + 'w_model.pth!\n')
                self.outfile.write('Saving ' + str(w_step) + 'w_model.pth\n')

                model_name = str(w_step) + 'w_' + '%6.6f' % (
                    sum(total_loss) / len(total_loss)
                ) + '%2.3f_' % r1_score + '%2.3f_' % r2_score + 'model.pth'
                state = {'step': step, 'state_dict': self.model.state_dict()}
                torch.save(state, os.path.join(self.model_dir, model_name))

                # entity_encoder_name = str(w_step) + '0w_' + '%6.6f'%(sum(total_loss)/len(total_loss)) + '%2.3f_'%r1_score + 'entity_encoder.pth'
                # state = {'step': step, 'state_dict': self.entity_encoder.state_dict()}
                # torch.save(state, os.path.join(self.model_dir, entity_encoder_name))

    def test(self):
        #prepare model
        path = self.args.load_model
        # entity_encoder_path = self.args.entity_encoder
        state_dict = torch.load(path)['state_dict']
        max_len = self.args.max_len
        model = self.model
        model.load_state_dict(state_dict)

        # entity_encoder_dict = torch.load(entity_encoder_path)['state_dict']
        # self.entity_encoder.load_state_dict(entity_encoder_dict)

        pred_dir = make_save_dir(self.args.pred_dir)
        filename = self.args.filename

        #start decoding
        data_yielder = self.data_utils.data_yielder(num_epoch=1)
        total_loss = []
        start = time.time()

        #file
        f = open(os.path.join(pred_dir, filename), 'w')

        self.model.eval()

        # decode_strategy = BeamSearch(
        #             self.beam_size,
        #             batch_size=batch.batch_size,
        #             pad=self._tgt_pad_idx,
        #             bos=self._tgt_bos_idx,
        #             eos=self._tgt_eos_idx,
        #             n_best=self.n_best,
        #             global_scorer=self.global_scorer,
        #             min_length=self.min_length, max_length=self.max_length,
        #             return_attention=attn_debug or self.replace_unk,
        #             block_ngram_repeat=self.block_ngram_repeat,
        #             exclusion_tokens=self._exclusion_idxs,
        #             stepwise_penalty=self.stepwise_penalty,
        #             ratio=self.ratio)

        step = 0
        for batch in data_yielder:
            #print(batch['src'].data.size())
            step += 1
            if step % 100 == 0:
                print('%d batch processed. Time elapsed: %f min.' %
                      (step, (time.time() - start) / 60.0))
                start = time.time()

            ### ner ###
            if self.args.entity_encoder_type == 'albert':
                d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                    batch['ner_text'],
                    return_attention_masks=True,
                    max_length=10,
                    add_special_tokens=False,
                    pad_to_max_length=True,
                    return_tensors='pt')
                ner_mask = d['attention_mask'].cuda().unsqueeze(1)
                ner = d['input_ids'].cuda()
            else:
                ner_mask = None
                ner = batch['ner'].long()

            with torch.no_grad():
                if self.args.beam_size == 1:
                    if self.args.ner_at_embedding:
                        out = self.model.greedy_decode(
                            batch['src_extended'].long(),
                            self.model.entity_encoder(ner),
                            batch['src_mask'], max_len, self.data_utils.bos,
                            len(batch['oov_list']), self.data_utils.vocab_size)
                    else:
                        out = self.model.greedy_decode(
                            batch['src_extended'].long(),
                            self.model.entity_encoder(ner),
                            batch['src_mask'],
                            max_len,
                            self.data_utils.bos,
                            len(batch['oov_list']),
                            self.data_utils.vocab_size,
                            ner_mask=ner_mask)
                else:
                    ret = self.beam_decode(batch, max_len,
                                           len(batch['oov_list']))
                    out = ret['predictions']
            for l in out:
                sentence = self.data_utils.id2sent(l[1:], True,
                                                   self.args.beam_size != 1,
                                                   batch['oov_list'])
                #print(l[1:])
                f.write(sentence)
                f.write("\n")

    def beam_decode(self, batch, max_len, oov_nums):

        src = batch['src'].long()
        src_mask = batch['src_mask']
        src_extended = batch['src_extended'].long()

        bos_token = self.data_utils.bos
        beam_size = self.args.beam_size
        vocab_size = self.data_utils.vocab_size
        batch_size = src.size(0)

        def rvar(a):
            return a.repeat(beam_size, 1, 1)

        def rvar2(a):
            return a.repeat(beam_size, 1)

        def bottle(m):
            return m.view(batch_size * beam_size, -1)

        def unbottle(m):
            return m.view(beam_size, batch_size, -1)

        ### ner ###
        if self.args.entity_encoder_type == 'albert':
            d = self.model.entity_encoder.tokenizer.batch_encode_plus(
                batch['ner_text'],
                return_attention_masks=True,
                max_length=10,
                add_special_tokens=False,
                pad_to_max_length=True,
                return_tensors='pt')
            ner_mask = d['attention_mask'].cuda().unsqueeze(1)
            ner = d['input_ids'].cuda()
        else:
            ner_mask = None
            ner = batch['ner'].long()
        ner = self.model.entity_encoder(ner)

        if self.args.ner_at_embedding:
            memory = self.model.encode(src, src_mask, ner)
        else:
            memory = self.model.encode(src, src_mask)

        assert batch_size == 1

        beam = [
            Beam(beam_size,
                 self.data_utils.pad,
                 bos_token,
                 self.data_utils.eos,
                 min_length=self.args.min_length) for i in range(batch_size)
        ]
        memory = rvar(memory)
        ner = rvar(ner)
        src_mask = rvar(src_mask)
        src_extended = rvar2(src_extended)

        for i in range(self.args.max_len):
            if all((b.done() for b in beam)):
                break
            # Construct batch x beam_size nxt words.
            # Get all the pending current beam words and arrange for forward.
            inp = torch.stack([b.get_current_state()
                               for b in beam]).t().contiguous().view(-1, 1)
            #inp -> [1, 3]
            inp_mask = inp < self.data_utils.vocab_size
            inp = inp * inp_mask.long()

            decoder_input = inp

            if self.args.ner_at_embedding:
                final_dist = self.model.decode(memory, ner, src_mask,
                                               decoder_input, None,
                                               src_extended, oov_nums)
            else:
                final_dist = self.model.decode(memory,
                                               ner,
                                               src_mask,
                                               decoder_input,
                                               None,
                                               src_extended,
                                               oov_nums,
                                               ner_mask=ner_mask)
            # final_dist, decoder_hidden, attn_dist_p, p_gen = self.seq2seq_model.model_copy.decoder(
            #                 decoder_input, decoder_hidden,
            #                 post_encoder_outputs, post_enc_padding_mask,
            #                 extra_zeros, post_enc_batch_extend_vocab
            #                 )
            # # Run one step.

            # print('inp', inp.size())

            # decoder_outputs: beam x rnn_size

            # (b) Compute a vector of batch*beam word scores.
            out = unbottle(final_dist)
            out[:, :, 2] = 0  #no unk
            # out.size -> [3, 1, vocab]

            # (c) Advance each beam.
            for j, b in enumerate(beam):
                b.advance(out[:, j])
                # decoder_hidden = self.beam_update(j, b.get_current_origin(), beam_size, decoder_hidden)

        # (4) Extract sentences from beam.
        ret = self._from_beam(beam)

        return ret

    def _from_beam(self, beam):
        ret = {"predictions": [], "scores": []}
        for b in beam:

            n_best = self.args.n_best
            scores, ks = b.sort_finished(minimum=n_best)
            hyps = []
            for i, (times, k) in enumerate(ks[:n_best]):
                hyp = b.get_hyp(times, k)
                hyps.append(hyp)

            ret["predictions"].append(hyps)
            ret["scores"].append(scores)

        return ret
Ejemplo n.º 24
0
def train(rank, defparams, hyper):

    params = {}
    for param in defparams.keys():
        params[param] = defparams[param]

    hyperp = {}
    for hp in hyper.keys():
        hyperp[hp] = hyper[hp]

    experiment = Experiment(api_key="keGmeIz4GfKlQZlOP6cit4QOi",
                            project_name="hadron-shower",
                            workspace="engineren")
    experiment.add_tag(params['exp'])

    experiment.log_parameters(hyperp)

    device = torch.device("cuda")
    torch.manual_seed(params["seed"])

    world_size = int(os.environ["SLURM_NNODES"])
    rank = int(os.environ["SLURM_PROCID"])

    dist.init_process_group(backend='nccl',
                            world_size=world_size,
                            rank=rank,
                            init_method=params["DDP_init_file"])

    aD = DCGAN_D(hyperp["ndf"]).to(device)
    aG = DCGAN_G(hyperp["ngf"], hyperp["z"]).to(device)
    aE = energyRegressor().to(device)
    aP = PostProcess_Size1Conv_EcondV2(48,
                                       13,
                                       3,
                                       128,
                                       bias=True,
                                       out_funct='none').to(device)

    optimizer_g = torch.optim.Adam(aG.parameters(),
                                   lr=hyperp["L_gen"],
                                   betas=(0.5, 0.9))
    optimizer_d = torch.optim.Adam(aD.parameters(),
                                   lr=hyperp["L_crit"],
                                   betas=(0.5, 0.9))
    optimizer_e = torch.optim.SGD(aE.parameters(), lr=hyperp["L_calib"])
    optimizer_p = torch.optim.Adam(aP.parameters(),
                                   lr=hyperp["L_post"],
                                   betas=(0.5, 0.9))

    assert torch.backends.cudnn.enabled, "NVIDIA/Apex:Amp requires cudnn backend to be enabled."
    torch.backends.cudnn.benchmark = True

    # Initialize Amp
    models, optimizers = amp.initialize([aG, aD], [optimizer_g, optimizer_d],
                                        opt_level="O1",
                                        num_losses=2)

    #aD = nn.DataParallel(aD)
    #aG = nn.DataParallel(aG)
    #aE = nn.DataParallel(aE)

    aG, aD = models
    optimizer_g, optimizer_d = optimizers

    aG = nn.parallel.DistributedDataParallel(aG, device_ids=[0])
    aD = nn.parallel.DistributedDataParallel(aD, device_ids=[0])
    aE = nn.parallel.DistributedDataParallel(aE, device_ids=[0])
    aP = nn.parallel.DistributedDataParallel(aP, device_ids=[0])

    experiment.set_model_graph(str(aG), overwrite=False)
    experiment.set_model_graph(str(aD), overwrite=False)

    if params["restore_pp"]:
        aP.load_state_dict(
            torch.load(params["restore_path_PP"] + params["post_saved"],
                       map_location=torch.device(device)))

    if params["restore"]:
        checkpoint = torch.load(params["restore_path"])
        aG.load_state_dict(checkpoint['Generator'])
        aD.load_state_dict(checkpoint['Critic'])
        optimizer_g.load_state_dict(checkpoint['G_optimizer'])
        optimizer_d.load_state_dict(checkpoint['D_optimizer'])
        itr = checkpoint['iteration']

    else:
        aG.apply(weights_init)
        aD.apply(weights_init)
        itr = 0

    if params["c0"]:
        aE.apply(weights_init)
    elif params["c1"]:
        aE.load_state_dict(
            torch.load(params["calib_saved"],
                       map_location=torch.device(device)))

    one = torch.tensor(1.0).to(device)
    mone = (one * -1).to(device)

    print('loading data...')
    paths_list = [
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part1.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part2.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part3.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part4.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part5.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part6.hdf5',
        '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part7.hdf5'
    ]

    train_data = PionsDataset(paths_list, core=True)

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_data, num_replicas=world_size, rank=rank)

    dataloader = DataLoader(train_data,
                            batch_size=hyperp["batch_size"],
                            num_workers=0,
                            shuffle=False,
                            drop_last=True,
                            pin_memory=True,
                            sampler=train_sampler)

    print('done')

    #scheduler_g = optim.lr_scheduler.StepLR(optimizer_g, step_size=1, gamma=params["gamma_g"])
    #scheduler_d = optim.lr_scheduler.StepLR(optimizer_d, step_size=1, gamma=params["gamma_crit"])
    #scheduler_e = optim.lr_scheduler.StepLR(optimizer_e, step_size=1, gamma=params["gamma_calib"])

    #writer = SummaryWriter()

    e_criterion = nn.L1Loss()  # for energy regressor training

    dataiter = iter(dataloader)

    BATCH_SIZE = hyperp["batch_size"]
    LATENT = hyperp["z"]
    EXP = params["exp"]
    KAPPA = hyperp["kappa"]
    LAMBD = hyperp["lambda"]
    ## Post-Processing
    LDP = hyperp["LDP"]
    wMMD = hyperp["wMMD"]
    wMSE = hyperp["wMSE"]

    ## IO paths
    OUTP = params['output_path']

    for iteration in range(50000):

        iteration += itr + 1
        #---------------------TRAIN D------------------------
        for p in aD.parameters():  # reset requires_grad
            p.requires_grad_(True)  # they are set to False below in training G

        for e in aE.parameters():  # reset requires_grad (constrainer)
            e.requires_grad_(True)  # they are set to False below in training G

        for i in range(hyperp["ncrit"]):

            aD.zero_grad()
            aE.zero_grad()

            noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT))
            noise = torch.from_numpy(noise).float()
            noise = noise.view(
                -1, LATENT, 1, 1,
                1)  #[BS, nz]  --> [Bs,nz,1,1,1] Needed for Generator
            noise = noise.to(device)

            batch = next(dataiter, None)

            if batch is None:
                dataiter = iter(dataloader)
                batch = dataiter.next()

            real_label = batch['energy']  ## energy label
            real_label = real_label.to(device)

            with torch.no_grad():
                noisev = noise  # totally freeze G, training D

            fake_data = aG(noisev, real_label).detach()

            real_data = batch['shower']  # 48x48x48 calo image
            real_data = real_data.to(device)
            real_data.requires_grad_(True)

            #### supervised-training for energy regressor!
            if params["train_calib"]:
                output = aE(real_data.float())
                e_loss = e_criterion(output, real_label.view(BATCH_SIZE, 1))
                e_loss.backward()
                optimizer_e.step()

            ######

            # train with real data

            disc_real = aD(real_data.float(), real_label.float())

            # train with fake data
            fake_data = fake_data.unsqueeze(
                1)  ## transform to [BS, 1, 48, 48, 48]
            disc_fake = aD(fake_data, real_label.float())

            # train with interpolated data
            gradient_penalty = calc_gradient_penalty(aD,
                                                     real_data.float(),
                                                     fake_data,
                                                     real_label,
                                                     BATCH_SIZE,
                                                     device,
                                                     DIM=13)

            ## wasserstein-1 distace
            w_dist = torch.mean(disc_fake) - torch.mean(disc_real)
            # final disc cost
            disc_cost = torch.mean(disc_fake) - torch.mean(
                disc_real) + LAMBD * gradient_penalty

            with amp.scale_loss(disc_cost, optimizer_d) as scaled_loss:
                scaled_loss.backward()

            optimizer_d.step()

            #--------------Log to COMET ML ----------
            if i == hyperp["ncrit"] - 1:
                experiment.log_metric("L_crit", disc_cost, step=iteration)
                experiment.log_metric("gradient_pen",
                                      gradient_penalty,
                                      step=iteration)
                experiment.log_metric("Wasserstein Dist",
                                      w_dist,
                                      step=iteration)
                if params["train_calib"]:
                    experiment.log_metric("L_const", e_loss, step=iteration)

        #---------------------TRAIN G------------------------
        for p in aD.parameters():
            p.requires_grad_(False)  # freeze D

        for c in aE.parameters():
            c.requires_grad_(False)  # freeze C

        gen_cost = None
        for i in range(hyperp["ngen"]):

            aG.zero_grad()

            noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT))
            noise = torch.from_numpy(noise).float()
            noise = noise.view(
                -1, LATENT, 1, 1,
                1)  #[BS, nz]  --> [Bs,nz,1,1,1] Needed for Generator
            noise = noise.to(device)

            batch = next(dataiter, None)

            if batch is None:
                dataiter = iter(dataloader)
                batch = dataiter.next()

            real_label = batch['energy']  ## energy label
            real_label = real_label.to(device)

            noise.requires_grad_(True)

            real_data = batch['shower']  # 48x48x48 calo image
            real_data = real_data.to(device)

            fake_data = aG(noise, real_label.float())
            fake_data = fake_data.unsqueeze(
                1)  ## transform to [BS, 1, 48, 48, 48]

            ## calculate loss function
            gen_cost = aD(fake_data.float(), real_label.float())

            ## label conditioning
            #output_g = aE(fake_data)
            #output_r = aE(real_data.float())

            output_g = 0.0  #for now
            output_r = 0.0  #for now

            aux_fake = (output_g - real_label)**2
            aux_real = (output_r - real_label)**2

            aux_errG = torch.abs(aux_fake - aux_real)

            ## Total loss function for generator
            g_cost = -torch.mean(gen_cost) + KAPPA * torch.mean(aux_errG)

            with amp.scale_loss(g_cost, optimizer_g) as scaled_loss_G:
                scaled_loss_G.backward()

            optimizer_g.step()

            #--------------Log to COMET ML ----------
            experiment.log_metric("L_Gen", g_cost, step=iteration)

            ## plot example image
            if iteration % 100 == 0.0 or iteration == 1:
                image = fake_data.view(-1, 48, 13, 13).cpu().detach().numpy()
                cmap = mpl.cm.viridis
                cmap.set_bad('white', 1.)
                figExIm = plt.figure(figsize=(6, 6))
                axExIm1 = figExIm.add_subplot(1, 1, 1)
                image1 = np.sum(image[0], axis=0)
                masked_array1 = np.ma.array(image1, mask=(image1 == 0.0))
                im1 = axExIm1.imshow(masked_array1,
                                     filternorm=False,
                                     interpolation='none',
                                     cmap=cmap,
                                     vmin=0.01,
                                     vmax=100,
                                     norm=mpl.colors.LogNorm(),
                                     origin='lower')
                figExIm.patch.set_facecolor('white')
                axExIm1.set_xlabel('y [cells]', family='serif')
                axExIm1.set_ylabel('x [cells]', family='serif')
                figExIm.colorbar(im1)

                experiment.log_figure(figure=plt, figure_name="x-y")

                figExIm = plt.figure(figsize=(6, 6))
                axExIm2 = figExIm.add_subplot(1, 1, 1)
                image2 = np.sum(image[0], axis=1)
                masked_array2 = np.ma.array(image2, mask=(image2 == 0.0))
                im2 = axExIm2.imshow(masked_array2,
                                     filternorm=False,
                                     interpolation='none',
                                     cmap=cmap,
                                     vmin=0.01,
                                     vmax=100,
                                     norm=mpl.colors.LogNorm(),
                                     origin='lower')
                figExIm.patch.set_facecolor('white')
                axExIm2.set_xlabel('y [cells]', family='serif')
                axExIm2.set_ylabel('z [layers]', family='serif')
                figExIm.colorbar(im2)

                experiment.log_figure(figure=plt, figure_name="y-z")

                figExIm = plt.figure(figsize=(6, 6))
                axExIm3 = figExIm.add_subplot(1, 1, 1)
                image3 = np.sum(image[0], axis=2)
                masked_array3 = np.ma.array(image3, mask=(image3 == 0.0))
                im3 = axExIm3.imshow(masked_array3,
                                     filternorm=False,
                                     interpolation='none',
                                     cmap=cmap,
                                     vmin=0.01,
                                     vmax=100,
                                     norm=mpl.colors.LogNorm(),
                                     origin='lower')
                figExIm.patch.set_facecolor('white')
                axExIm3.set_xlabel('x [cells]', family='serif')
                axExIm3.set_ylabel('z [layers]', family='serif')
                figExIm.colorbar(im3)
                #experiment.log_metric("L_aux", aux_errG, step=iteration)
                experiment.log_figure(figure=plt, figure_name="x-z")

                ## E-sum monitoring

                figEsum = plt.figure(figsize=(6, 6 * 0.77 / 0.67))
                axEsum = figEsum.add_subplot(1, 1, 1)
                etot_real = getTotE(real_data.cpu().detach().numpy(),
                                    xbins=13,
                                    ybins=13)
                etot_fake = getTotE(image, xbins=13, ybins=13)

                axEsumReal = axEsum.hist(etot_real,
                                         bins=25,
                                         range=[0, 1500],
                                         weights=np.ones_like(etot_real) /
                                         (float(len(etot_real))),
                                         label="orig",
                                         color='blue',
                                         histtype='stepfilled')

                axEsumFake = axEsum.hist(etot_fake,
                                         bins=25,
                                         range=[0, 1500],
                                         weights=np.ones_like(etot_fake) /
                                         (float(len(etot_fake))),
                                         label="generated",
                                         color='red',
                                         histtype='stepfilled')

                axEsum.text(0.25,
                            0.81,
                            "WGAN",
                            horizontalalignment='left',
                            verticalalignment='top',
                            transform=axEsum.transAxes,
                            color='red')
                axEsum.text(0.25,
                            0.87,
                            'GEANT 4',
                            horizontalalignment='left',
                            verticalalignment='top',
                            transform=axEsum.transAxes,
                            color='blue')

                experiment.log_figure(figure=plt, figure_name="E-sum")

        #end = timer()
        #print(f'---train G elapsed time: {end - start}')

        if params["train_postP"]:
            #---------------------TRAIN P------------------------
            for p in aD.parameters():
                p.requires_grad_(False)  # freeze D

            for c in aG.parameters():
                c.requires_grad_(False)  # freeze G

            lossP = None
            for i in range(1):

                noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT))
                noise = torch.from_numpy(noise).float()
                noise = noise.view(
                    -1, LATENT, 1, 1,
                    1)  #[BS, nz]  --> [Bs,nz,1,1,1] Needed for Generator
                noise = noise.to(device)

                batch = next(dataiter, None)

                if batch is None:
                    dataiter = iter(dataloader)
                    batch = dataiter.next()

                real_label = batch['energy']  ## energy label
                real_label = real_label.to(device)

                noise.requires_grad_(True)

                real_data = batch['shower']  # calo image
                real_data = real_data.to(device)

                ## forward pass to generator
                fake_data = aG(noise, real_label.float())
                fake_data = fake_data.unsqueeze(
                    1)  ## transform to [BS, 1, layer, size, size]

                ### first LossD_P
                fake_dataP = aP(fake_data.float(), real_label.float())
                lossD_P = aD(fake_dataP.float(), real_label.float())
                lossD_P = lossD_P.mean()

                ## lossFixP

                real_sorted = real_data.view(BATCH_SIZE, -1)
                fake_sorted = fake_dataP.view(BATCH_SIZE, -1)

                real_sorted, _ = torch.sort(real_sorted,
                                            dim=1,
                                            descending=True)  #.view(900,1)
                fake_sorted, _ = torch.sort(fake_sorted,
                                            dim=1,
                                            descending=True)  #.view(900,1)

                lossFixPp1 = mmd_hit_sortKernel(real_sorted.float(),
                                                fake_sorted,
                                                kernel_size=100,
                                                stride=50,
                                                cutoff=2000,
                                                alpha=200)

                lossFixPp2 = F.mse_loss(fake_dataP.view(BATCH_SIZE, -1),
                                        fake_data.detach().view(
                                            BATCH_SIZE, -1),
                                        reduction='mean')

                lossFixP = wMMD * lossFixPp1 + wMSE * lossFixPp2

                lossP = LDP * lossD_P - lossFixP

                lossP.backward(mone)
                optimizer_p.step()

        if iteration % 100 == 0 or iteration == 1:
            print('iteration: {}, critic loss: {}'.format(
                iteration,
                disc_cost.cpu().data.numpy()))
            if rank == 0:
                torch.save(
                    {
                        'Generator': aG.state_dict(),
                        'Critic': aD.state_dict(),
                        'G_optimizer': optimizer_g.state_dict(),
                        'D_optimizer': optimizer_d.state_dict(),
                        'iteration': iteration
                    }, OUTP + '{0}/wgan_itrs_{1}.pth'.format(EXP, iteration))
                if params["train_calib"]:
                    torch.save(
                        aE.state_dict(),
                        OUTP + '/{0}/netE_itrs_{1}.pth'.format(EXP, iteration))
                if params["train_postP"]:
                    torch.save(
                        aP.state_dict(),
                        OUTP + '{0}/netP_itrs_{1}.pth'.format(EXP, iteration))
Ejemplo n.º 25
0
def train(normal_digit, anomalies, folder, file, p_train, p_test):

    # Create an experiment
    experiment = Experiment(project_name="deep-stats-thesis",
                            workspace="stecaron",
                            disabled=True)
    experiment.add_tag("mnist_conv_ae")

    # General parameters
    DOWNLOAD_MNIST = True
    PATH_DATA = os.path.join(os.path.expanduser("~"), 'Downloads/mnist')
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Define training parameters
    hyper_params = {
        "EPOCH": 75,
        "NUM_WORKERS": 10,
        "BATCH_SIZE": 256,
        "LR": 0.001,
        "TRAIN_SIZE": 4000,
        "TRAIN_NOISE": p_train,
        "TEST_SIZE": 800,
        "TEST_NOISE": p_test,
        # on which class we want to learn outliers
        "CLASS_SELECTED": [normal_digit],
        # which class we want to corrupt our dataset with
        "CLASS_CORRUPTED": anomalies,
        "ALPHA": p_test,
        "MODEL_NAME": "mnist_ae_model",
        "LOAD_MODEL": False,
        "LOAD_MODEL_NAME": "mnist_ae_model"
    }

    # Log experiment parameters
    experiment.log_parameters(hyper_params)

    # Load data
    train_data, test_data = load_mnist(PATH_DATA, download=DOWNLOAD_MNIST)

    # Train the autoencoder
    model = ConvAutoEncoder2()
    optimizer = torch.optim.Adam(model.parameters(), lr=hyper_params["LR"])
    #loss_func = nn.MSELoss()
    loss_func = nn.BCELoss()

    # Build "train" and "test" datasets
    id_maj_train = numpy.random.choice(numpy.where(
        numpy.isin(train_data.train_labels, hyper_params["CLASS_SELECTED"]))[0],
        int((1 - hyper_params["TRAIN_NOISE"]) *
            hyper_params["TRAIN_SIZE"]),
        replace=False)
    id_min_train = numpy.random.choice(numpy.where(
        numpy.isin(train_data.train_labels, hyper_params["CLASS_CORRUPTED"]))[0],
        int(hyper_params["TRAIN_NOISE"] *
            hyper_params["TRAIN_SIZE"]),
        replace=False)
    id_train = numpy.concatenate((id_maj_train, id_min_train))

    id_maj_test = numpy.random.choice(numpy.where(
        numpy.isin(test_data.test_labels, hyper_params["CLASS_SELECTED"]))[0],
        int((1 - hyper_params["TEST_NOISE"]) *
            hyper_params["TEST_SIZE"]),
        replace=False)
    id_min_test = numpy.random.choice(numpy.where(
        numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]))[0],
        int(hyper_params["TEST_NOISE"] *
            hyper_params["TEST_SIZE"]),
        replace=False)
    id_test = numpy.concatenate((id_min_test, id_maj_test))

    train_data.data = train_data.data[id_train]
    train_data.targets = train_data.targets[id_train]

    test_data.data = test_data.data[id_test]
    test_data.targets = test_data.targets[id_test]

    train_data.targets = torch.from_numpy(
        numpy.isin(train_data.train_labels,
                   hyper_params["CLASS_CORRUPTED"])).type(torch.int32)
    test_data.targets = torch.from_numpy(
        numpy.isin(test_data.test_labels,
                   hyper_params["CLASS_CORRUPTED"])).type(torch.int32)

    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=hyper_params["BATCH_SIZE"],
                                   shuffle=True,
                                   num_workers=hyper_params["NUM_WORKERS"])

    test_loader = Data.DataLoader(dataset=test_data,
                                  batch_size=test_data.data.shape[0],
                                  shuffle=False,
                                  num_workers=hyper_params["NUM_WORKERS"])
    model.train()
    if hyper_params["LOAD_MODEL"]:
        model = torch.load(hyper_params["LOAD_MODEL_NAME"])
    else:
        train_mnist(train_loader,
                    model,
                    criterion=optimizer,
                    n_epoch=hyper_params["EPOCH"],
                    experiment=experiment,
                    device=device,
                    model_name=hyper_params["MODEL_NAME"],
                    loss_func=loss_func,
                    loss_type="binary")

    # Compute p-values
    model.to(device)
    pval, test_errors = compute_reconstruction_pval(
        train_loader, model, test_loader, device)
    pval_order = numpy.argsort(pval)

    # Plot p-values
    x_line = numpy.arange(0, len(test_data), step=1)
    y_line = numpy.linspace(0, 1, len(test_data))
    y_adj = numpy.arange(0, len(test_data),
                         step=1) / len(test_data) * hyper_params["ALPHA"]
    zoom = int(0.2 * len(test_data))  # nb of points to zoom

    #index = numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]).astype(int)
    index = numpy.array(test_data.targets).astype(int)

    fig, (ax1, ax2) = plt.subplots(2, 1)

    ax1.scatter(numpy.arange(0, len(pval), 1),
                pval[pval_order],
                c=index[pval_order].reshape(-1))
    ax1.plot(x_line, y_line, color="green")
    ax1.plot(x_line, y_adj, color="red")
    ax1.set_title(
        f'Entire test dataset with {int(hyper_params["TEST_NOISE"] * 100)}% of noise'
    )
    ax1.set_xticklabels([])

    ax2.scatter(numpy.arange(0, zoom, 1),
                pval[pval_order][0:zoom],
                c=index[pval_order].reshape(-1)[0:zoom])
    ax2.plot(x_line[0:zoom], y_line[0:zoom], color="green")
    ax2.plot(x_line[0:zoom], y_adj[0:zoom], color="red")
    ax2.set_title('Zoomed in')
    ax2.set_xticklabels([])

    experiment.log_figure(figure_name="empirical_test_hypothesis",
                          figure=fig,
                          overwrite=True)
    plt.savefig(os.path.join(folder, "pvalues_" + file + ".png"))
    plt.show()

    # Compute some stats
    precision, recall, f1_score, average_precision, roc_auc = test_performances(
        pval, index, hyper_params["ALPHA"])
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1_score}")
    print(f"AUC: {roc_auc}")
    print(f"Average Precison: {average_precision}")
    experiment.log_metric("precision", precision)
    experiment.log_metric("recall", recall)
    experiment.log_metric("f1_score", f1_score)
    experiment.log_metric("auc", roc_auc)
    experiment.log_metric("average_precision", average_precision)

    # Show some examples

    fig, axs = plt.subplots(5, 5)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(25):
        image = test_data.data[pval_order[i]]
        axs[i].imshow(image, cmap='gray')
        axs[i].axis('off')

    experiment.log_figure(figure_name="rejetcted_observations",
                          figure=fig,
                          overwrite=True)
    plt.show()

    fig, axs = plt.subplots(5, 5)
    fig.tight_layout()
    axs = axs.ravel()

    for i in range(25):
        image = test_data.data[pval_order[int(len(pval) - 1) - i]]
        axs[i].imshow(image, cmap='gray')
        axs[i].axis('off')

    experiment.log_figure(figure_name="better_observations",
                          figure=fig,
                          overwrite=True)
    plt.show()

    # Save the results in the output file
    col_names = ["timestamp", "precision", "recall", "f1_score",
                 "average_precision", "auc"]
    results_file = os.path.join(folder, "results_" + file + ".csv")
    if os.path.exists(results_file):
        df_results = pandas.read_csv(results_file, names=col_names, header=0)
    else:
        df_results = pandas.DataFrame(columns=col_names)

    df_results = df_results.append(
        pandas.DataFrame(
            numpy.concatenate(
                (numpy.array(
                    datetime.datetime.fromtimestamp(
                        time.time()).strftime('%Y-%m-%d %H:%M:%S')).reshape(1),
                 precision.reshape(1), recall.reshape(1),
                 f1_score.reshape(1), average_precision.reshape(1),
                 roc_auc.reshape(1))).reshape(1, -1), columns=col_names), ignore_index=True)

    df_results.to_csv(results_file)
Ejemplo n.º 26
0
                       data,
                       teach=True)
learn.model, net = learn.model.to(args.gpu), net.to(args.gpu)

teacher = learn.model

sf_student, sf_teacher = get_features(net, teacher, experiment=expt)

if args.api_key:
    project_name = expt + '-' + hyper_params['model'] + '-' + hyper_params[
        'dataset']
    experiment = Experiment(api_key=args.api_key,
                            project_name=project_name,
                            workspace=args.workspace)
    experiment.log_parameters(hyper_params)

optimizer = torch.optim.Adam(net.parameters(),
                             lr=hyper_params["learning_rate"])
loss_function2 = nn.MSELoss()
loss_function = nn.CrossEntropyLoss()
savename = get_savename(hyper_params, experiment=expt)
best_val_acc = 0
for epoch in range(hyper_params['num_epochs']):
    student, train_loss, val_loss, val_acc, best_val_acc = train(
        net, teacher, data, sf_teacher, sf_student, loss_function,
        loss_function2, optimizer, hyper_params, epoch, savename, best_val_acc)
    if args.api_key:
        experiment.log_metric("train_loss", train_loss)
        experiment.log_metric("val_loss", val_loss)
        experiment.log_metric("val_acc", val_acc * 100)
Ejemplo n.º 27
0
                    add_to_feed=feed_means_stds,
                    minibatch_size=minibatch_size,
                    save_after=10,
                    save_path=_path_sav_fold("models"))

    plot_cost_name = "{}_nll_costs".format(exp_name)
    costs = [float(c) for c in costs]
    plot_cost_graph(plot_cost_name, costs,
                    "{}.png".format(_path_sav_fold(plot_cost_name)))

    logger.info("--> Duration: {:.4f}".format(timers.tac()))

    # Comet.ml
    if use_comet:
        for cost in costs:
            experiment.log_metric("cost", cost)

elif training_type == "adversarial":
    timers.tic()
    logger.info("Compiling adversarial model.")

    forward, backward_masks = spn.compile_adversarial(
        learning_rate=learning_rate)

    logger.info("--> Duration: {:.4f}".format(timers.tac()))

    timers.tic()
    logger.info("Fitting.")

    feed_means_stds = {
        spn.leaf_layer.means: leaf_means,
Ejemplo n.º 28
0
                    adam_stop = True
    return eval_acc


"""
Train model on Natural Language Inference task
"""
epoch = 1

#nli_net.load_state_dict(torch.load(os.path.join(params.outputdir, params.criticmodelname)))
#print("\nCritic Loaded")

while not stop_training and epoch <= params.n_epochs:
    with experiment.train():
        train_accc, train_losss = trainepoch(epoch, RL_train=False)
        experiment.log_metric("Train Accuracy", train_accc, step=epoch)
        experiment.log_metric("Train Loss",
                              sum(train_losss) / len(train_losss),
                              step=epoch)
    with experiment.test():
        eval_accc = evaluate(epoch, 'valid')
        experiment.log_metric("Validation Accuracy", eval_accc, step=epoch)
    epoch += 1

# Run best model on test set.
nli_net.load_state_dict(
    torch.load(os.path.join(params.outputdir, params.criticmodelname)))
print("\nCritic Loaded")
#actorModel.load_state_dict(torch.load(os.path.join(params.outputdir, params.actormodelname)))
#print("\nActor Loaded")
#print(evaluate(epoch, 'train'))
Ejemplo n.º 29
0
        global_step = 0
        for epoch in range(1, num_epochs + 1):
            print("Epoch: {}/{}".format(epoch, num_epochs))

            with experiment.train():
                for train_step in range(train_steps):
                    global_step += 1

                    # Perform training step on batch and record metrics
                    loss, accuracy = model.train_on_batch(
                        train_text[train_step], train_labels[train_step])
                    train_loss.append(loss)
                    train_accuracy.append(accuracy)

                    experiment.log_metric('loss',
                                          np.mean(train_loss),
                                          step=global_step)
                    experiment.log_metric('accuracy',
                                          np.mean(train_accuracy),
                                          step=global_step)

                    # Every evaluate_steps evaluate model on validation set
                    if (train_step + 1) % evaluate_steps == 0 or (
                            train_step + 1) == train_steps:
                        with experiment.validate():
                            for val_step in range(val_steps):

                                # Perform evaluation step on batch and record metrics
                                loss, accuracy = model.test_on_batch(
                                    val_text[val_step], val_labels[val_step])
                                val_loss.append(loss)
					c_loss = mixup_criterion(c_obj_fn, code, m_label_a, m_label_b, lam)
				else:
					code, output = model(m_batch)
					cce_loss = criterion(output, m_label)
					c_loss = c_obj_fn(code, m_label)
				loss = cce_loss + (parser['c_loss_weight'] * c_loss)

				#print(loss)
				optimizer.zero_grad()
				loss.backward()
				for param in c_obj_fn.parameters():
					param.grad.data *= (parser['c_loss_lr'] / (parser['c_loss_weight'] * parser['lr']))
				optimizer.step()
				pbar.set_description('epoch: %d loss: %.3f'%(epoch, loss))
				pbar.update(1)
		experiment.log_metric('trn_loss', loss)
		#lr_scheduler.step()

		#validation phase
		model.eval()
		with torch.set_grad_enabled(False):
			embeddings_dev = []
			data_y_dev = []
			with tqdm(total = len(devset_gen), ncols = 70) as pbar:
				for m_batch, m_label in devset_gen:
					m_batch = m_batch.to(device)
					code, _ = model(m_batch)
					m_label = list(m_label.numpy())
					embeddings_dev.extend(list(code.cpu().numpy())) #>>> (16, 64?)
					data_y_dev.extend(m_label)
					pbar.set_description('epoch%d: Extract ValEmbeddings'%(epoch))
Ejemplo n.º 31
0
class Experiment:
    """
        A helper class to facilitate the training and validation procedure of the GoTurnRemix model

        Parameters
        ----------
        learning_rate: float
            Learning rate to train the model. The optimizer is SGD and the loss is L1 Loss
        image_size: int
            The size of the input image. This has to be fixed before the data is created
        data_path: Path
            Path to the data folder. If the folder name includes "pickle", then the data saved as pickles are loaded
        augment: bool
            Perform augmentation on the images before training
        logs_path: Path
            Path to save the validation predictions at the end of each epoch
        models_path: Path
            Path to save the model state at the end of each epoch
        save_name: str
            Name of the folder in which the logs and models are saved. If not provided, the current datetime is used
    """
    def __init__(self,
                 learning_rate: float,
                 image_size: int,
                 data_path: Path,
                 augment: bool = True,
                 logs_path: Path = None,
                 models_path: Path = None,
                 save_name: str = None,
                 comet_api: str = None):
        self.image_size = image_size
        self.logs_path = logs_path
        self.models_path = models_path
        self.model = GoTurnRemix()
        self.model.cuda()
        self.criterion = torch.nn.L1Loss()
        self.optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                                self.model.parameters()),
                                         lr=learning_rate)
        self.model_name = str(datetime.datetime.now()).split('.')[0].replace(
            ':', '-').replace(' ', '-')
        self.model_name = save_name if save_name else self.model_name
        self.augment = augment
        self.data = Data(data_path,
                         target_size=self.image_size,
                         transforms=augment)
        self.comet = None
        if comet_api:
            self.comet = Comet(api_key=comet_api)
            self.comet.log_parameter('learning_rate', learning_rate)
            self.comet.log_parameter('image_size', image_size)
            self.comet.log_parameter('augment', augment)

    def __train_step__(self, data):
        """
        Performs one step of the training procedure

        Parameters
        ----------
        data
            data obtained from @Data.__getitem__

        Returns
        -------
           Loss at the end of training step
        """
        if self.comet:
            self.comet.train()
        previous_cropped, current_cropped, bbox, scale, crop = data
        previous_cropped = torch.div(previous_cropped, 255).float().cuda()
        current_cropped = torch.div(current_cropped, 255).float().cuda()
        previous_cropped = torch.autograd.Variable(previous_cropped,
                                                   requires_grad=True)
        current_cropped = torch.autograd.Variable(current_cropped,
                                                  requires_grad=True)
        bbox = bbox.requires_grad_(True).float().cuda()
        self.optimizer.zero_grad()
        preds = self.model(previous_cropped, current_cropped)

        del previous_cropped
        del current_cropped
        gc.collect()

        loss = self.criterion(preds, bbox)
        if self.comet:
            self.comet.log_metric('loss', loss)
        loss.backward()
        self.optimizer.step()
        return loss

    def __test__(self):
        """
        Test tracking of the model

        Returns
        -------
            Test loss and test predictions
        """
        # Set model to evaluation mode
        if self.comet:
            self.comet.test()
        self.model.eval()
        test_preds = []
        test_loss = []
        video_frames = self.data.video_frames[-1]
        video_annotations = self.data.video_annotations[-1]
        p_a = video_annotations[0]
        p_f = video_frames[0]
        test_preds.append(p_a)

        for i in tqdm(range(1, len(video_annotations)), desc='Validating'):
            c_a = video_annotations[i]
            c_f = video_frames[i]
            p_c, c_c, bbox, scale, crop = self.data.make_crops(
                p_f, c_f, p_a, c_a)
            p_c = torch.div(torch.from_numpy(p_c),
                            255).unsqueeze(0).float().cuda()
            c_c = torch.div(torch.from_numpy(c_c),
                            255).unsqueeze(0).float().cuda()
            bbox = torch.tensor(bbox, requires_grad=False).float().cuda()
            preds = self.model(p_c, c_c)

            del p_c
            del c_c
            gc.collect()

            loss = torch.nn.functional.l1_loss(preds, bbox)
            if self.comet:
                self.comet.log_metric('val_loss', loss)
            test_loss.append(loss.item())
            preds = self.data.get_bbox(preds.cpu().detach().numpy()[0],
                                       self.image_size, scale, crop)
            test_preds.append(preds)
            p_a = preds
            p_f = c_f
        return test_loss, test_preds

    def __validate__(self):
        """
        Performs validation on the model

        Returns
        -------
            Validation loss and validation predictions
        """
        # Set model to evaluation mode
        if self.comet:
            self.comet.validate()
        self.model.eval()
        validation_preds = []
        validation_loss = []
        video_frames = self.data.video_frames[-1]
        video_annotations = self.data.video_annotations[-1]
        p_a = video_annotations[0]
        p_f = video_frames[0]
        validation_preds.append(p_a)

        for i in tqdm(range(1, len(video_annotations)), desc='Validating'):
            c_a = video_annotations[i]
            c_f = video_frames[i]
            p_c, c_c, bbox, scale, crop = self.data.make_crops(
                p_f, c_f, p_a, c_a)
            p_c = torch.div(torch.from_numpy(p_c),
                            255).unsqueeze(0).float().cuda()
            c_c = torch.div(torch.from_numpy(c_c),
                            255).unsqueeze(0).float().cuda()
            bbox = torch.tensor(bbox, requires_grad=False).float().cuda()
            preds = self.model(p_c, c_c)

            del p_c
            del c_c
            gc.collect()

            loss = torch.nn.functional.l1_loss(preds, bbox)
            if self.comet:
                self.comet.log_metric('val_loss', loss)
            validation_loss.append(loss.item())
            preds = self.data.get_bbox(preds.cpu().detach().numpy()[0],
                                       self.image_size, scale, crop)
            validation_preds.append(preds)
            p_a = c_a
            p_f = c_f
        return validation_loss, validation_preds

    def train(self,
              epochs: int,
              batch_size: int,
              validate: bool = True,
              test: bool = True):
        """
        Trains the model for @epochs number of epochs

        Parameters
        ----------
        epochs: int
            Number of epochs to train the model
        batch_size: int
            The size of each batch when training the model
        validate: bool, default=True
            If True, validation occurs at the end of each epoch
            The results are saved in @logs_path and models are saved in @models_path
        test: bool, default=True
            If True, the model is tested for tracking at the end of the training procedure
            The results are saved in @logs_path

        Returns
        -------
            list: List containing the training loss at the end of each epoch
        """
        if self.comet:
            self.comet.log_parameter('epochs', epochs)
            self.comet.log_parameter('batch_size', batch_size)
        loss_per_epoch = []
        preds_per_epoch = []
        # Set the model to training mode
        self.model.train()
        # Create a DataLoader to feed data to the model
        dataloader = torch.utils.data.DataLoader(dataset=self.data,
                                                 batch_size=batch_size,
                                                 shuffle=True)

        # Run for @epochs number of epochs
        for epoch in range(epochs):
            if self.comet:
                self.comet.log_metric('epoch', epoch)
            running_loss = []
            for step, data in enumerate(
                    tqdm(dataloader,
                         total=int(len(self.data) / batch_size),
                         desc='Epoch {}'.format(epoch))):
                loss = self.__train_step__(data)
                running_loss.append(loss.item())
            training_loss = sum(running_loss) / len(running_loss)
            if self.comet:
                self.comet.log_metric('mean_train_loss', training_loss)
            loss_per_epoch.append(sum(running_loss) / len(running_loss))
            if validate:
                validation_loss, validation_preds = self.__validate__()
                if self.comet:
                    self.comet.log_metric('mean_validation_loss',
                                          validation_loss)
                preds_per_epoch.append(validation_preds)
                print('Validation loss: {}'.format(
                    sum(validation_loss) / len(validation_loss)))
            # Save the model at this stage
            if self.models_path:
                (self.models_path / self.model_name).mkdir(exist_ok=True)
                torch.save(self.model, (self.models_path / self.model_name /
                                        'epoch_{}'.format(epoch)).resolve())
            print('Training Loss: {}'.format(training_loss))
        # Save the validation frames, ground truths and predictions at this stage
        if self.logs_path:
            (self.logs_path / self.model_name).mkdir(exist_ok=True)
            save = {
                'frames': self.data.video_frames[-1],
                'truth': self.data.video_annotations[-1],
                'preds': preds_per_epoch
            }
            np.save(
                str((self.logs_path / self.model_name /
                     'preds_per_epoch.npy').resolve()), save)
        # Test the model and save the results
        if test:
            test_loss, test_preds = self.__test__()
            if self.logs_path:
                (self.logs_path / self.model_name).mkdir(exist_ok=True)
                save = {
                    'frames': self.data.video_frames[-1],
                    'truth': self.data.video_annotations[-1],
                    'preds': test_preds,
                    'loss': test_loss
                }
                np.save(
                    str((self.logs_path / self.model_name /
                         'test_preds.npy').resolve()), save)
        return loss_per_epoch