예제 #1
0
def classification_interface(model, data_loader_train, data_loader_val, data_loader_test, \
                         latent_size, latent_range, device, epoch_number, number_of_classes=3):
    # 0-color, 1 - shape, 2 - scale (from 0.5 to 1.0), 3,4 - orientation (cos, sin), 5,6 - position (from 0 to 1)
    classificator = SimpleNet(latent_size=latent_size, number_of_classes=number_of_classes)
    classificator.to(device)
    metric = classification(model, classificator, data_loader_train, data_loader_val, data_loader_test, \
                   latent_range, device, epoch_number)
    return metric
예제 #2
0
def regression_interface(model, data_loader_train, data_loader_val, data_loader_test, \
                         latent_size, latent_range, min_value, max_value, device, epoch_number):
    # 0-color, 1 - shape, 2 - scale (from 0.5 to 1.0), 3,4 - orientation (cos, sin), 5,6 - position (from 0 to 1)
    regressor = SimpleNet(latent_size=latent_size, number_of_classes=len(latent_range))
    regressor.to(device)
    metric = regression(model, regressor, data_loader_train, data_loader_val, data_loader_test, \
               latent_range, device, min_value, max_value, epoch_number)
    return metric
 def __init__(self, device, trainData, validData, hidden_size, lr,
              batch_size, arch):
     self.device = device
     self.trainData = trainData
     self.validData = validData
     self.model = SimpleNet(hidden_size).to(device)
     self.generator = Generator(hidden_size).to(device)
     self.discriminator = Discriminator(hidden_size).to(device)
     self.opt = torch.optim.Adam(self.model.parameters(), lr=1e-3)
     self.opt_G = torch.optim.Adam(self.generator.parameters(), lr=1e-4)
     self.opt_D = torch.optim.Adam(self.discriminator.parameters(), lr=1e-4)
     self.criterion = torch.nn.BCEWithLogitsLoss()
     self.scheduler = StepLR(self.opt, step_size=150, gamma=0.5)
     self.scheduler_G = StepLR(self.opt_G, step_size=300, gamma=0.5)
     self.scheduler_D = StepLR(self.opt_D, step_size=300, gamma=0.5)
     self.batch_size = batch_size
     self.arch = arch
     self.history = {'train': [], 'valid': []}
class Trainer:
    def __init__(self, device, trainData, validData, hidden_size, lr,
                 batch_size, arch):
        self.device = device
        self.trainData = trainData
        self.validData = validData
        self.model = SimpleNet(hidden_size).to(device)
        self.generator = Generator(hidden_size).to(device)
        self.discriminator = Discriminator(hidden_size).to(device)
        self.opt = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        self.opt_G = torch.optim.Adam(self.generator.parameters(), lr=1e-4)
        self.opt_D = torch.optim.Adam(self.discriminator.parameters(), lr=1e-4)
        self.criterion = torch.nn.BCEWithLogitsLoss()
        self.scheduler = StepLR(self.opt, step_size=150, gamma=0.5)
        self.scheduler_G = StepLR(self.opt_G, step_size=300, gamma=0.5)
        self.scheduler_D = StepLR(self.opt_D, step_size=300, gamma=0.5)
        self.batch_size = batch_size
        self.arch = arch
        self.history = {'train': [], 'valid': []}

    def run_epoch(self, epoch, training):
        self.model.train(training)
        self.generator.train(training)
        self.discriminator.train(training)

        if training:
            description = 'Train'
            dataset = self.trainData
            shuffle = True
        else:
            description = 'Valid'
            dataset = self.validData
            shuffle = False
        dataloader = DataLoader(dataset=dataset,
                                batch_size=self.batch_size,
                                shuffle=shuffle,
                                collate_fn=dataset.collate_fn,
                                num_workers=4)

        trange = tqdm(enumerate(dataloader),
                      total=len(dataloader),
                      desc=description,
                      ascii=True)

        g_loss = 0
        d_loss = 0
        loss = 0
        accuracy = Accuracy()

        for i, (features, real_missing, labels) in trange:

            features = features.to(self.device)  # (batch, 11)
            real_missing = real_missing.to(self.device)  # (batch, 3)
            labels = labels.to(self.device)  # (batch, 1)
            batch_size = features.shape[0]

            if training:
                rand = torch.rand((batch_size, 11)).to(self.device) - 0.5
                std = features.std(dim=1)
                noise = rand * std.unsqueeze(1)
                features += noise

            # Adversarial ground truths
            valid = torch.FloatTensor(batch_size, 1).fill_(1.0).to(
                self.device)  # (batch, 1)
            fake = torch.FloatTensor(batch_size, 1).fill_(0.0).to(
                self.device)  # (batch, 1)

            # ---------------------
            #  Train Discriminator
            # ---------------------

            if i % 10 < 5 or not training:
                real_pred = self.discriminator(real_missing)
                d_real_loss = self.criterion(real_pred, valid)

                fake_missing = self.generator(features.detach())
                fake_pred = self.discriminator(fake_missing)
                d_fake_loss = self.criterion(fake_pred, fake)
                batch_d_loss = (d_real_loss + d_fake_loss)

                if training:
                    self.opt_D.zero_grad()
                    batch_d_loss.backward()
                    self.opt_D.step()
                d_loss += batch_d_loss.item()

            # -----------------
            #  Train Generator
            # -----------------

            if i % 10 >= 5 or not training:
                gen_missing = self.generator(features.detach())
                validity = self.discriminator(gen_missing)
                batch_g_loss = self.criterion(validity, valid)

                if training:
                    self.opt_G.zero_grad()
                    batch_g_loss.backward()
                    self.opt_G.step()
                g_loss += batch_g_loss.item()

                # ------------------
                #  Train Classifier
                # ------------------

                gen_missing = self.generator(features.detach())
                all_features = torch.cat((features, gen_missing), dim=1)
                o_labels = self.model(all_features)
                batch_loss = self.criterion(o_labels, labels)

                if training:
                    self.opt.zero_grad()
                    batch_loss.backward()
                    self.opt.step()
                loss += batch_loss.item()
                accuracy.update(o_labels, labels)

                trange.set_postfix(accuracy=accuracy.print_score(),
                                   g_loss=g_loss / (i + 1),
                                   d_loss=d_loss / (i + 1),
                                   loss=loss / (i + 1))

        if training:
            self.history['train'].append({
                'accuracy': accuracy.get_score(),
                'g_loss': g_loss / len(trange),
                'd_loss': d_loss / len(trange),
                'loss': loss / len(trange)
            })
            self.scheduler.step()
            self.scheduler_G.step()
            self.scheduler_D.step()
        else:
            self.history['valid'].append({
                'accuracy': accuracy.get_score(),
                'g_loss': g_loss / len(trange),
                'd_loss': d_loss / len(trange),
                'loss': loss / len(trange)
            })

    def save(self, epoch):
        if not os.path.exists(self.arch):
            os.makedirs(self.arch)

        path = self.arch + '/model.pkl.' + str(epoch)
        torch.save(
            {
                'model': self.model.state_dict(),
                'generator': self.generator.state_dict(),
                'discriminator': self.discriminator.state_dict()
            }, path)
        with open(self.arch + '/history.json', 'w') as f:
            json.dump(self.history, f, indent=4)
예제 #5
0
def experiment(logdir: str, device: str):
    tb_logdir = logdir / "tensorboard"

    seed_all()
    model = SimpleNet().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    train_loader, valid_loader = get_loaders("")

    with TensorboardLogger(tb_logdir) as tb:
        stage = "stage0"
        n_epochs = 10

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric="accuracy",
            metric_minimization=False,
            save_n_best=3,
        )

        for ep in range(1, n_epochs + 1):
            print(f"[Epoch {ep}/{n_epochs}]")
            train_loss, train_acc = train_fn(
                model, train_loader, device, criterion, optimizer
            )
            valid_loss, valid_acc = valid_fn(model, valid_loader, device, criterion)

            # log metrics
            tb.metric(f"{stage}/loss", {"train": train_loss, "valid": valid_loss}, ep)
            tb.metric(
                f"{stage}/accuracy", {"train": train_acc, "valid": valid_acc}, ep,
            )

            epoch_metrics = {
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "valid_loss": valid_loss,
                "valid_accuracy": valid_acc,
            }

            # store checkpoints
            checkpointer.process(
                score=valid_acc,
                epoch=ep,
                checkpoint=make_checkpoint(
                    stage, ep, model, optimizer, metrics=epoch_metrics,
                ),
            )

            print()
            print(f"            train loss - {train_loss:.5f}")
            print(f"train dataset accuracy - {train_acc:.5f}")
            print(f"            valid loss - {valid_loss:.5f}")
            print(f"valid dataset accuracy - {valid_acc:.5f}")
            print()

        # do a next training stage
        stage = "stage1"
        n_epochs = 10
        print(f"\n\nStage - {stage}")

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric="accuracy",
            metric_minimization=False,
            save_n_best=3,
        )

        load_checkpoint(logdir / "stage0" / "best.pth", model)
        optimizer = optim.Adam(model.parameters(), lr=1e-4 / 2)

        for ep in range(1, n_epochs + 1):
            print(f"[Epoch {ep}/{n_epochs}]")
            train_loss, train_acc = train_fn(
                model, train_loader, device, criterion, optimizer
            )
            valid_loss, valid_acc = valid_fn(model, valid_loader, device, criterion)

            # log metrics
            tb.metric(f"{stage}/loss", {"train": train_loss, "valid": valid_loss}, ep)
            tb.metric(
                f"{stage}/accuracy", {"train": train_acc, "valid": valid_acc}, ep,
            )

            epoch_metrics = {
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "valid_loss": valid_loss,
                "valid_accuracy": valid_acc,
            }

            # store checkpoints
            checkpointer.process(
                score=valid_acc,
                epoch=ep,
                checkpoint=make_checkpoint(
                    stage, ep, model, optimizer, metrics=epoch_metrics,
                ),
            )

            print()
            print(f"            train loss - {train_loss:.5f}")
            print(f"train dataset accuracy - {train_acc:.5f}")
            print(f"            valid loss - {valid_loss:.5f}")
            print(f"valid dataset accuracy - {valid_acc:.5f}")
            print()

        load_checkpoint(logdir / "stage1" / "best.pth", model)
예제 #6
0
def experiment(rank, world_size, logdir):
    """Experiment flow.

    Args:
        rank (int): process rank
        world_size (int): world size
        logdir (pathlib.Path): directory with logs
    """
    # preparations
    torch.cuda.set_device(rank)
    setup(rank, world_size)
    logdir = Path(logdir) if isinstance(logdir, str) else logdir
    tb_logdir = logdir / "tensorboard"

    main_metric = "accuracy"
    minimize_metric = False

    def log(text):
        if rank == 0:
            print(text)

    train_loader, valid_loader = get_loaders("", rank, world_size)
    world_setup = (rank, world_size)

    train_batch_cnt = 0
    valid_batch_cnt = 0

    with TensorboardLogger(str(tb_logdir), write_to_disk=(rank == 0)) as tb:
        stage = "stage0"
        n_epochs = 2
        log(f"Stage - {stage}")

        seed_all()
        model = SimpleNet()
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        log("Used sync batchnorm")

        model = model.to(rank)
        model = nn.parallel.DistributedDataParallel(model, device_ids=[rank])
        optimizer = optim.AdamW(model.parameters(), lr=1e-3)
        criterion = nn.CrossEntropyLoss()

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=3,
        )

        for ep in range(1, n_epochs + 1):
            log(f"[Epoch {ep}/{n_epochs}]")
            train_metrics = train_fn(
                model,
                train_loader,
                world_setup,
                criterion,
                optimizer,
                tb_logger=tb,
                last_iteration_index=train_batch_cnt,
            )
            if rank == 0:
                tb.add_scalars(f"{stage}/train", train_metrics, ep)
            train_batch_cnt += len(train_loader)

            valid_metrics = valid_fn(
                model,
                valid_loader,
                world_setup,
                criterion,
                tb_logger=tb,
                last_iteration_index=valid_batch_cnt,
            )
            valid_batch_cnt += len(valid_loader)
            if rank == 0:
                tb.add_scalars(f"{stage}/valid", valid_metrics, ep)

                # store checkpoints
                checkpointer.process(
                    score=valid_metrics[main_metric],
                    epoch=ep,
                    checkpoint=make_checkpoint(
                        stage,
                        ep,
                        model,
                        optimizer,
                        metrics={
                            "train": train_metrics,
                            "valid": valid_metrics
                        },
                    ),
                )

            log("[{}/{}] train: loss - {}, accuracy - {}".format(
                ep, n_epochs, train_metrics["loss"],
                train_metrics["accuracy"]))
            log("[{}/{}] valid: loss - {}, accuracy - {}".format(
                ep, n_epochs, valid_metrics["loss"],
                valid_metrics["accuracy"]))

        # do a next training stage
        stage = "stage1"
        n_epochs = 3
        log("*" * 100)
        log(f"Stage - {stage}")

        # wait other processes
        dist.barrier()

        model = SimpleNet()
        load_checkpoint(logdir / "stage0" / "best.pth", model, verbose=True)
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)

        model = model.to(rank)
        model = nn.parallel.DistributedDataParallel(model, device_ids=[rank])
        optimizer = optim.Adam(model.parameters(), lr=1e-4 / 2)

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=3,
        )

        for ep in range(1, n_epochs + 1):
            log(f"[Epoch {ep}/{n_epochs}]")
            train_metrics = train_fn(
                model,
                train_loader,
                world_setup,
                criterion,
                optimizer,
                tb_logger=tb,
                last_iteration_index=train_batch_cnt,
            )
            if rank == 0:
                tb.add_scalars(f"{stage}/train", train_metrics, ep)
            train_batch_cnt += len(train_loader)

            valid_metrics = valid_fn(
                model,
                valid_loader,
                world_setup,
                criterion,
                tb_logger=tb,
                last_iteration_index=valid_batch_cnt,
            )
            valid_batch_cnt += len(valid_loader)
            if rank == 0:
                tb.add_scalars(f"{stage}/valid", valid_metrics, ep)

                # store checkpoints
                checkpointer.process(
                    score=valid_metrics[main_metric],
                    epoch=ep,
                    checkpoint=make_checkpoint(
                        stage,
                        ep,
                        model,
                        optimizer,
                        metrics={
                            "train": train_metrics,
                            "valid": valid_metrics
                        },
                    ),
                )

            log("[{}/{}] train: loss - {}, accuracy - {}".format(
                ep, n_epochs, train_metrics["loss"],
                train_metrics["accuracy"]))
            log("[{}/{}] valid: loss - {}, accuracy - {}".format(
                ep, n_epochs, valid_metrics["loss"],
                valid_metrics["accuracy"]))

    cleanup()
예제 #7
0
train, test = shuju_split_contact(shuju_name)
train_data = torch.utils.data.ConcatDataset(train)
test_data = torch.utils.data.ConcatDataset(test)
train_batch_size = 64
test_batch_size = 128
learning_rate = 0.01
num_epoches = 30
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=train_batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                          batch_size=test_batch_size,
                                          shuffle=True)
#torch.cuda.manual_seed(42)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = SimpleNet()
model.to(device)

#实例化模型
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# 开始训练
losses = []
acces = []
eval_losses = []
eval_acces = []
예제 #8
0
if __name__ == "__main__":
    utils.makedirs(args.save)
    logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'),
                              filepath=os.path.abspath(__file__))
    logger.info(args)

    logger.info("Number of train samples = {}".format(
        len(train_loader) * args.batch_size))
    logger.info("Number of test samples = {}".format(
        len(test_loader) * args.batch_size))

    torch.manual_seed(args.seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    model = SimpleNet(train_features.shape[-1]).to(args.device)
    criterion = torch.nn.CrossEntropyLoss(reduction="sum")
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum)

    # Compute all metrics for initializations

    # Test accuracy per class
    init_test_accuracy_per_class = utils.get_accuracy_per_class(
        model, test_loader, args.device, 2)
    test_accuracy_per_class = {
        0: [init_test_accuracy_per_class[0]],
        1: [init_test_accuracy_per_class[1]]
    }
예제 #9
0
def main(args):
    torch.manual_seed(args.seed)
    data = pd.read_csv("../data/data_final_two.csv")
    label = data["winner"].copy()
    data = data.drop(columns=['winner'])
    data_np = data.to_numpy()
    label_np = label.to_numpy()
    x_train, x_valid, y_train, y_valid = train_test_split(data_np,
                                                          label_np,
                                                          test_size=0.2)
    # Splitting and preparing the dataset and dataloader

    # train_set = pd.read_csv("../data/train.csv", index_col=None)
    # valid_set = pd.read_csv("../data/valid.csv", index_col=None)
    # test_set = pd.read_csv("../data/test.csv", index_col=None)

    # x_train = train_set.to_numpy()[:, 1:]
    # y_train = train_set.to_numpy()[:, 0]
    # x_valid = valid_set.to_numpy()[:, 1:]
    # y_valid = valid_set.to_numpy()[:, 0]
    #
    # x_test = test_set.to_numpy()[:, 1:]
    # y_test = test_set.to_numpy()[:, 0]

    train_data = FightDataset(x_train, y_train)
    valid_data = FightDataset(x_valid, y_valid)
    # test_data = FightDataset(x_test, y_test)

    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True)
    val_loader = DataLoader(valid_data, batch_size=len(x_valid), shuffle=True)
    # test_loader = DataLoader(test_data, batch_size=len(x_test),
    #                          shuffle=True)

    model = SimpleNet()
    loss_function = torch.nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)

    # Initializing the list to hold accuracies and losses
    t_accuracystore = []
    v_accuracystore = []
    t_lossstore = []
    v_lossstore = []
    t = time()
    test_acc = 0
    for i in range(args.epochs):
        t_acc = 0
        model.train()
        for j, d in enumerate(train_loader, 0):

            inputs, label = d
            optimizer.zero_grad()
            predict = model(inputs.float())
            t_loss = loss_function(input=predict.squeeze(),
                                   target=label.float())
            t_loss.backward()
            optimizer.step()

            # Evaluating training accuracy
            for k in range(len(label)):
                if round(predict[k].item()) == label[k]:
                    t_acc += 1

        v_acc = 0
        # Evaluating validation accuracy
        model.eval()
        for j, d in enumerate(val_loader, 0):
            inputs, label = d
            predict = model(inputs.float())
            v_loss = loss_function(input=predict.squeeze(),
                                   target=label.float())
            for k in range(len(label)):
                if round(predict[k].item()) == label[k]:
                    v_acc += 1
        t_accuracystore.append(t_acc / len(train_data))
        v_accuracystore.append(v_acc / len(valid_data))
        t_lossstore.append(t_loss)
        v_lossstore.append(v_loss)
        print("%5.3f" % (v_acc / len(valid_data)))

    # for j, d in enumerate(test_loader, 0):
    #     inputs, label = d
    #     predict = model(inputs.float())
    #     test_loss = loss_function(input=predict.squeeze(),
    #                               target=label.float())
    #     for k in range(len(label)):
    #         if round(predict[k].item()) == label[k]:
    #             test_acc += 1

    elapsed = time() - t
    print(elapsed)
    # print(test_acc / len(test_data))

    # Plotting accuracies for training and validation
    epoch_store = range(len(t_accuracystore))
    loss_store = range(len(t_lossstore))

    plt.plot(epoch_store, t_accuracystore, label='Train')
    plt.plot(epoch_store, v_accuracystore, label='Validation')
    plt.title("Accuracy over Batches")
    plt.legend(['Training', 'Validation'])
    plt.xlabel('Batch #')
    plt.ylabel('Accuracy')
    plt.show()

    plt.plot(loss_store, t_lossstore, label='Train')
    plt.plot(loss_store, v_lossstore, label='Validation')
    plt.title("Loss over Batches")
    plt.legend(['Training', 'Validation'])
    plt.xlabel('Batch #')
    plt.ylabel('Accuracy')
    plt.show()
예제 #10
0
conf['train']['batch_size'] = 128
data_loader_train = DataLoader(train_val['train'], batch_size=conf['train']['batch_size'], shuffle=True, num_workers=2)
data_loader_val = DataLoader(train_val['val'], batch_size=500, shuffle=False, num_workers=1)

model = AutoEncoder(in_channels=1, dec_channels=1, latent_size=conf['model']['latent_size'])
model = model.to(device)
#
#autoencoder_bce_loss_latent12.pt
model.load_state_dict(torch.load('weights/archi_mega_super_long_metric_learn_6.pt'))

#1 - scale (from 0.5 to 1.0), 2,3 - orientation (cos, sin), 4,5 - position (from 0 to 1)
latent_range = [4,5]
min_value = 0
max_value = 1

regressor = SimpleNet(latent_size=conf['model']['latent_size'], number_of_classes=len(latent_range))
regressor.to(device)

loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(regressor.parameters(), lr=0.001)



def regression_validation(regressor, model, data_loader):
    precision_list = []
    for batch_i, batch in enumerate(data_loader):
        # if batch_i == 500:
        #     break
        label = batch['latent'][:, latent_range]  # figure type
        label = label.type(torch.FloatTensor)
        label = label.type(torch.float32)
예제 #11
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--arch',
                        required=True,
                        help='architecture (model_dir)')
    parser.add_argument('--data_dir', default='../../data/', type=str)
    parser.add_argument('--do_train', action='store_true')
    parser.add_argument('--do_predict', action='store_true')
    parser.add_argument('--hidden_size', default=512, type=int)
    parser.add_argument('--batch_size', default=32, type=int)
    parser.add_argument('--max_epoch', default=800, type=int)
    parser.add_argument('--lr', default=1e-3, type=float)
    parser.add_argument('--cuda', default=1, type=int)
    parser.add_argument('--ckpt',
                        default=-1,
                        type=int,
                        help='load pre-trained model epoch')
    args = parser.parse_args()

    if args.do_train:

        dataset = pd.read_csv(args.data_dir + "train.csv")
        dataset.drop("Id", axis=1, inplace=True)

        train_set, valid_set = train_test_split(dataset,
                                                test_size=0.2,
                                                random_state=42)
        train = preprocess_samples(train_set, missing=["F2", "F7", "F12"])
        valid = preprocess_samples(valid_set, missing=["F2", "F7", "F12"])
        trainData = FeatureDataset(train)
        validData = FeatureDataset(valid)

        device = torch.device(
            'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu')
        trainer = Trainer(device, trainData, validData, args.hidden_size,
                          args.lr, args.batch_size, args.arch)

        for epoch in range(1, args.max_epoch + 1):
            print('Epoch: {}'.format(epoch))
            trainer.run_epoch(epoch, True)
            trainer.run_epoch(epoch, False)
            if epoch % 50 == 0:
                trainer.save(epoch)

    if args.do_predict:

        dataset = pd.read_csv(args.data_dir + "test.csv")
        dataset.drop("Id", axis=1, inplace=True)
        test = preprocess_samples(dataset, missing=["F2", "F7", "F12"])
        testData = FeatureDataset(test)

        path = '%s/model.pkl.%d' % (args.arch, args.ckpt)
        checkpoint = torch.load(path)
        device = torch.device(
            'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu')

        model = SimpleNet(args.hidden_size)
        model.load_state_dict(checkpoint['model'])
        model.to(device)
        model.train(False)
        generator = Generator(args.hidden_size)
        generator.load_state_dict(checkpoint['generator'])
        generator.to(device)
        generator.train(False)

        dataloader = DataLoader(dataset=testData,
                                batch_size=args.batch_size,
                                shuffle=False,
                                collate_fn=testData.collate_fn,
                                num_workers=4)
        trange = tqdm(enumerate(dataloader),
                      total=len(dataloader),
                      desc='Predict')
        prediction = []
        for i, (features, missing, y) in trange:

            gen_missing = generator(features.to(device))
            all_features = torch.cat(
                (features.to(device), gen_missing.to(device)), dim=1)
            o_labels = model(all_features)
            o_labels = F.sigmoid(o_labels) > 0.5
            prediction.append(o_labels.to('cpu'))

        prediction = torch.cat(prediction).detach().numpy().astype(int)
        SubmitGenerator(prediction, args.data_dir + 'sampleSubmission.csv')
                               batch_size=conf['train']['batch_size'],
                               shuffle=True,
                               num_workers=2)
data_loader_val = DataLoader(train_val['val'],
                             batch_size=500,
                             shuffle=False,
                             num_workers=1)
load_path = 'weights/grayscale/archi_mega_super_long_metric_learn_6.pt'  #my_algorithm_2triplet_5.pt'

model = AutoEncoder(in_channels=1,
                    dec_channels=1,
                    latent_size=conf['model']['latent_size'])
model = model.to(device)
model.load_state_dict(torch.load(load_path))

classifier = SimpleNet(conf['model']['latent_size'])
#classifier = ComplexNet(in_channels=1, dec_channels=1, latent_size=conf['model']['latent_size'])
#print(dir(classifier))
classifier.to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)


def classification_validation(classifier, model, data_loader):
    precision_list = []
    for batch_i, batch in enumerate(data_loader):
        # if batch_i == 500:
        #     break
        label = batch['latent'][:, 0]  # 0 - figure type
        label = label.type(torch.LongTensor) - 1
예제 #13
0
def main():
    # Set up logging
    log = logging.getLogger()
    log.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s - %(levelname)s(%(name)s): %(message)s')
    consH = logging.StreamHandler()
    consH.setFormatter(formatter)
    consH.setLevel(logging.DEBUG)
    log.addHandler(consH)

    # Parse command lines
    parser = argparse.ArgumentParser()
    parser.add_argument('--data-path',
                        type=str,
                        help='Path to csv with data file.')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='Learning rate (default: 0.01).')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='Number of epochs (default: 10).')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        help='Batch size (default: 64).')
    parser.add_argument('--train-val-split',
                        type=float,
                        default=0.8,
                        help='Training vs. validation split (default: 0.8).')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed (default: 0).')
    opts = parser.parse_args()
    data_path = opts.data_path
    lr = opts.lr
    epochs = opts.epochs
    batch_size = opts.batch_size
    seed = opts.seed
    split_percent = opts.train_val_split
    log.info(
        f'Running fraud_net with the following parameters:\n- Learning Rate: {lr}\n- Epochs: {epochs}'
        + f'\n- Batch size: {batch_size}\n- Seed: {seed}' +
        f'\n- Training-validation split: {100*split_percent:.0f}-{100*(1-split_percent):.0f}'
    )

    # Load data
    train_data = FraudDataset(csv_file=data_path,
                              split='train',
                              split_percent=split_percent,
                              seed=seed)
    val_data = FraudDataset(csv_file=data_path,
                            split='val',
                            split_percent=split_percent,
                            seed=seed)

    # Load model and optimizer
    # TODO: Create model script, import it, instantiate model here
    model = SimpleNet(in_dim=10, hidden_dim=100,
                      out_dim=1)  # this is just an example
    optimizer = None
    # TODO: Instantiate optimizer

    # Run training-validation loops:
    run_train_eval(model, optimizer, train_data, val_data, lr, epochs,
                   batch_size)