Example #1
0
class bgrl(Method):
    def __init__(self, input_shape, vol):
        super(bgrl, self).__init__(500)
        self.input_shape = input_shape
        self.vol = vol
        self.gamma = 1.0  # control the effect of softmax
        self.losses = np.zeros((self.max_iter, ))
        self.vals = np.zeros((self.max_iter, ))

        self.device = torch.device("cuda")
        self.mu = MLP(input_shape, hidden_dim=64,
                      num_outputs=1).to(device=self.device)
        self.nu = MLP(input_shape, hidden_dim=64,
                      num_outputs=1).to(device=self.device)
        self.tf_optim = Adam(list(self.mu.parameters()) +
                             list(self.nu.parameters()),
                             lr=0.002)

    def update_parameters(self, As, Bs, shuffle=True):
        if shuffle:
            np.random.shuffle(As)
            np.random.shuffle(Bs)
        As = torch.FloatTensor(As).to(self.device)
        Bs = torch.FloatTensor(Bs).to(self.device)
        VAs = self.mu(As)
        VBs = self.nu(Bs)

        cost = torch.norm(As - Bs, p=2, dim=-1)
        damping = VAs.squeeze() - VBs.squeeze() - cost
        damping = self.gamma * torch.exp(damping / self.gamma)
        loss = -VAs.mean() + VBs.mean() + damping.mean()

        self.tf_optim.zero_grad()
        loss.backward()
        self.tf_optim.step()

        return loss.item()

    def estimate(self, As, Bs):
        As = torch.FloatTensor(As).to(self.device)
        Bs = torch.FloatTensor(Bs).to(self.device)
        VAs = self.mu(As)
        VBs = self.nu(Bs)
        rv = torch.abs(VAs.mean() - VBs.mean())
        return rv.squeeze().detach().cpu().numpy()

    def train(self, As, Bs):
        for i in range(self.max_iter):
            loss = self.update_parameters(As, Bs)
            self.losses[i] = loss
            self.vals[i] = self.estimate(As, Bs)
Example #2
0
def mpl(root, path_train, path_test):
    data_set_train = dataset_MLP(root + path_train, train=True)
    data_set_test = dataset_MLP(root + path_test, train=False)

    trainloader = DataLoader(data_set_train, batch_size=1000, shuffle=True)
    testloader = DataLoader(data_set_test, batch_size=1000)

    model = MLP()

    criterion = t.nn.CrossEntropyLoss()
    lr = 0.01
    optimizer = t.optim.SGD(model.parameters(), lr, momentum=0.4)

    for epoch in range(240):
        for _, (data, label) in enumerate(trainloader):
            model.train()
            optimizer.zero_grad()
            score = model(data)
            loss = criterion(score, label)
            loss.backward()
            optimizer.step()
        print("Epoch:%d loss:%f" % (epoch, loss.mean()))

    res = []
    for _, (data) in enumerate(testloader):
        model.eval()
        predict = model(data)
        predict = predict.detach().numpy().tolist()
        res += predict
    res = np.array(res)

    ans = np.argmax(res, axis=1)
    data_set_test.save_res(ans, "./images/res_MLP.csv")
Example #3
0
def main(dataset, dim, layers, lr, reg, epochs, batchsize):
    n_user = overlap_user(dataset)
    print(n_user)
    logging.info(str(n_user))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    mf_s, mf_t = load_model(dataset, dim)
    mapping = MLP(dim, layers)
    mf_s = mf_s.to(device)
    mf_t = mf_t.to(device)
    mapping = mapping.to(device)
    opt = torch.optim.Adam(mapping.parameters(), lr=lr, weight_decay=reg)
    mse_loss = nn.MSELoss()

    start = time()
    for epoch in range(epochs):
        loss_sum = 0
        for users in batch_user(n_user, batchsize):
            us = torch.tensor(users).long()
            us = us.to(device)
            u = mf_s.get_embed(us)
            y = mf_t.get_embed(us)
            loss = train(mapping, opt, mse_loss, u, y)
            loss_sum += loss
        print('Epoch %d [%.1f] loss = %f' % (epoch, time()-start, loss_sum))
        logging.info('Epoch %d [%.1f] loss = %f' %
                     (epoch, time()-start, loss_sum))
        start = time()

    mfile = 'pretrain/%s/Mapping.pth.tar' % dataset
    torch.save(mapping.state_dict(), mfile)
    print('save [%.1f]' % (time()-start))
    logging.info('save [%.1f]' % (time()-start))
def train_and_send(global_model_weights, current_epoch, IDS_df):
    device = 'cpu'
    if torch.cuda.is_available():
        device = 'cuda'

    # Defining the DNN model
    input_size = model_input_size
    model = MLP(input_size)
    model.load_state_dict(torch.load(global_model_weights))
    model.to(device)

    # Cross Entropy Loss
    error = nn.CrossEntropyLoss().to(device)

    # Adam Optimizer
    learning_rate = 0.001
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=0.01)

    model, loss = train_model_stratified(model, optimizer, error, device,
                                         current_epoch, IDS_df)

    # Encode model weights and send
    model.to('cpu')
    model_str = encode_weights(model)
    remote_mqttclient.publish(TRAINED_MODEL_TOPIC,
                              payload=model_str,
                              qos=2,
                              retain=False)
    remote_mqttclient.publish(TRAINED_LOSS_TOPIC,
                              payload=str(loss),
                              qos=2,
                              retain=False)
Example #5
0
class wgan(Method):
    def __init__(self, input_shape, vol):
        super(wgan, self).__init__(2000)
        self.input_shape = input_shape
        self.vol = vol
        self.clamp_max = 0.01
        self.losses = np.zeros((self.max_iter, ))
        self.vals = np.zeros((self.max_iter, ))

        self.device = torch.device("cuda")
        self.disc = MLP(input_shape, hidden_dim=64,
                        num_outputs=1).to(device=self.device)
        self.disc_optim = Adam(self.disc.parameters(), lr=0.002)

    def update_parameters(self, As, Bs, shuffle=True):
        if shuffle:
            np.random.shuffle(As)
            np.random.shuffle(Bs)
        As = torch.FloatTensor(As).to(self.device)
        Bs = torch.FloatTensor(Bs).to(self.device)
        VAs = self.disc(As)
        VBs = self.disc(Bs)

        loss1 = VAs.mean()
        loss2 = -VBs.mean()
        self.disc_optim.zero_grad()
        loss1.backward()
        loss2.backward()
        self.disc_optim.step()
        for p in self.disc.parameters():
            p.data.clamp_(-self.clamp_max, self.clamp_max)

        return (loss1 + loss2).item()

    def estimate(self, As, Bs):
        As = torch.FloatTensor(As).to(self.device)
        Bs = torch.FloatTensor(Bs).to(self.device)
        VAs = self.disc(As)
        VBs = self.disc(Bs)
        rv = torch.abs(VAs.mean() - VBs.mean())
        return rv.squeeze().detach().cpu().numpy()

    def train(self, As, Bs):
        for i in range(self.max_iter):
            loss = self.update_parameters(As, Bs)
            self.losses[i] = loss
            self.vals[i] = self.estimate(As, Bs)
Example #6
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # # Build data loader
    # dataset,targets= load_dataset()
    # np.save("__cache_dataset.npy", dataset)
    # np.save("__cache_targets.npy", targets)
    # return

    dataset = np.load("__cache_dataset.npy")
    targets = np.load("__cache_targets.npy")

    # Build the models
    mlp = MLP(args.input_size, args.output_size)

    mlp.load_state_dict(
        torch.load(
            '_backup_model_statedict/mlp_100_4000_PReLU_ae_dd_final.pkl'))

    if torch.cuda.is_available():
        mlp.cuda()

    # Loss and Optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adagrad(mlp.parameters())

    # Train the Models
    total_loss = []
    print(len(dataset))
    print(len(targets))
    sm = 100  # start saving models after 100 epochs
    for epoch in range(args.num_epochs):
        print("epoch" + str(epoch))
        avg_loss = 0
        for i in range(0, len(dataset), args.batch_size):
            # Forward, Backward and Optimize
            mlp.zero_grad()
            bi, bt = get_input(i, dataset, targets, args.batch_size)
            bi = to_var(bi)
            bt = to_var(bt)
            bo = mlp(bi)
            loss = criterion(bo, bt)
            avg_loss = avg_loss + loss.item()
            loss.backward()
            optimizer.step()
        print("--average loss:")
        print(avg_loss / (len(dataset) / args.batch_size))
        total_loss.append(avg_loss / (len(dataset) / args.batch_size))
        # Save the models
        if epoch == sm:
            model_path = 'mlp_100_4000_PReLU_ae_dd' + str(sm) + '.pkl'
            torch.save(mlp.state_dict(),
                       os.path.join(args.model_path, model_path))
            sm = sm + 50  # save model after every 50 epochs from 100 epoch ownwards
    torch.save(total_loss, 'total_loss.dat')
    model_path = 'mlp_100_4000_PReLU_ae_dd_final.pkl'
    torch.save(mlp.state_dict(), os.path.join(args.model_path, model_path))
Example #7
0
def gpu_thread(load, memory_queue, process_queue, common_dict, worker):
    # the only thread that has an access to the gpu, it will then perform all the NN computation
    import psutil
    p = psutil.Process()
    p.cpu_affinity([worker])
    import signal
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    try:
        print('process started with pid: {} on core {}'.format(
            os.getpid(), worker),
              flush=True)
        model = MLP(parameters.OBS_SPACE, parameters.ACTION_SPACE)
        model.to(parameters.DEVICE)
        # optimizer = optim.Adam(model.parameters(), lr=5e-5)
        # optimizer = optim.SGD(model.parameters(), lr=3e-2)
        optimizer = optim.RMSprop(model.parameters(), lr=1e-4)
        epochs = 0
        if load:
            checkpoint = torch.load('./model/walker.pt')
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            epochs = checkpoint['epochs']
        observations = torch.Tensor([]).to(parameters.DEVICE)
        rewards = torch.Tensor([]).to(parameters.DEVICE)
        actions = torch.Tensor([]).to(parameters.DEVICE)
        probs = torch.Tensor([]).to(parameters.DEVICE)
        common_dict['epoch'] = epochs
        while True:
            memory_full, observations, rewards, actions, probs = \
                destack_memory(memory_queue, observations, rewards, actions, probs)
            destack_process(model, process_queue, common_dict)
            if len(observations) > parameters.MAXLEN or memory_full:
                epochs += 1
                print('-' * 60 + '\n        epoch ' + str(epochs) + '\n' +
                      '-' * 60)
                run_epoch(epochs, model, optimizer, observations, rewards,
                          actions, probs)
                observations = torch.Tensor([]).to(parameters.DEVICE)
                rewards = torch.Tensor([]).to(parameters.DEVICE)
                actions = torch.Tensor([]).to(parameters.DEVICE)
                probs = torch.Tensor([]).to(parameters.DEVICE)
                torch.save(
                    {
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'epochs': epochs
                    }, './model/walker.pt')
                common_dict['epoch'] = epochs
    except Exception as e:
        print(e)
        print('saving before interruption', flush=True)
        torch.save(
            {
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epochs': epochs
            }, './model/walker.pt')
Example #8
0
File: main.py Project: hsack6/AGATE
def main(opt):
    train_dataset = BADataset(opt.dataroot, opt.L, True, False, False)
    train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=True, num_workers=opt.workers, drop_last=True)

    valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False)
    valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    test_dataset = BADataset(opt.dataroot, opt.L, False, False, True)
    test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    all_dataset = BADataset(opt.dataroot, opt.L, False, False, False)
    all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \
                                     shuffle=False, num_workers=opt.workers, drop_last=False)

    opt.n_edge_types = train_dataset.n_edge_types
    opt.n_node = train_dataset.n_node

    net = MLP(opt)
    net.double()
    print(net)

    criterion = nn.BCELoss()

    if opt.cuda:
        net.cuda()
        criterion.cuda()

    optimizer = optim.Adam(net.parameters(), lr=opt.lr)
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    os.makedirs(OutputDir, exist_ok=True)
    train_loss_ls = []
    valid_loss_ls = []
    test_loss_ls = []

    for epoch in range(0, opt.niter):
        train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt)
        valid_loss = valid(valid_dataloader, net, criterion, opt)
        test_loss = test(test_dataloader, net, criterion, opt)

        train_loss_ls.append(train_loss)
        valid_loss_ls.append(valid_loss)
        test_loss_ls.append(test_loss)

        early_stopping(valid_loss, net, OutputDir)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls})
    df.to_csv(OutputDir + '/loss.csv', index=False)

    net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt'))
    inference(all_dataloader, net, criterion, opt, OutputDir)
Example #9
0
def train(FLAGS):
    """
    Train our embeddings.
    """

    # Get data loaders
    print("==> Reading and processing the data ... ", end="")
    train_loader, test_loader, num_unique_words = process_data(
        data_dir=FLAGS.data_dir,
        data_file=FLAGS.data_file,
        vocab_size=FLAGS.vocab_size,
        window_size=FLAGS.window_size,
        split_ratio=FLAGS.split_ratio,
        batch_size=FLAGS.batch_size,
    )
    print("[COMPLETE]")

    # Initialize model, criterion, loss
    print("==> Initializing model components ... ", end="")
    model = MLP(
        D_in=num_unique_words,
        embedding_dim=FLAGS.embedding_dim,
        num_hidden_units=FLAGS.num_hidden_units,
        window_size=FLAGS.window_size,
    )
    # Objective
    criterion = torch.nn.CrossEntropyLoss()
    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr)
    print("[COMPLETE]")

    # Train the model
    print("==> Training the model ... [IN PROGRESS]")
    model = training_procedure(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        train_loader=train_loader,
        test_loader=test_loader,
        num_epochs=FLAGS.num_epochs,
        learning_rate=FLAGS.lr,
        decay_rate=FLAGS.decay_rate,
        max_grad_norm=FLAGS.max_grad_norm,
    )
    print("\n[COMPLETE]")

    # Save the model
    print("==> Saving the model ... [IN PROGRESS]")
    torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt"))
    print("\n[COMPLETE]")
Example #10
0
def train(FLAGS):
    """
    Train our embeddings.
    """

    # Get data loaders
    print ("==> Reading and processing the data ... ", end="")
    train_loader, test_loader, num_unique_words = process_data(
        data_dir=FLAGS.data_dir,
        data_file=FLAGS.data_file,
        vocab_size=FLAGS.vocab_size,
        window_size=FLAGS.window_size,
        split_ratio=FLAGS.split_ratio,
        batch_size=FLAGS.batch_size,
        )
    print ("[COMPLETE]")

    # Initialize model, criterion, loss
    print ("==> Initializing model components ... ", end="")
    model = MLP(
        D_in=num_unique_words,
        embedding_dim=FLAGS.embedding_dim,
        num_hidden_units=FLAGS.num_hidden_units,
        window_size=FLAGS.window_size,
        )
    # Objective
    criterion = torch.nn.CrossEntropyLoss()
    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr)
    print ("[COMPLETE]")

    # Train the model
    print ("==> Training the model ... [IN PROGRESS]")
    model = training_procedure(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        train_loader=train_loader,
        test_loader=test_loader,
        num_epochs=FLAGS.num_epochs,
        learning_rate=FLAGS.lr,
        decay_rate=FLAGS.decay_rate,
        max_grad_norm=FLAGS.max_grad_norm,
        )
    print ("\n[COMPLETE]")

    # Save the model
    print ("==> Saving the model ... [IN PROGRESS]")
    torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt"))
    print ("\n[COMPLETE]")
Example #11
0
def main():

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)

    data = locate('get_{}'.format(args.dataset))(args)
    train_data, val_data, test_data = data

    if args.dataset in 'mnist':
        model = MLP(args)
    elif args.dataset in 'cifar10' or args.dataset in 'cifar100':
        model = Resnet18(args)
    else:
        raise Exception('error')
    weight_arch = data_selection(data[0])
    architect = Architect(model, weight_arch, args)

    train_loader  = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, drop_last = True)
    val_loader = DataLoader(val_data, batch_size = 64, shuffle = True, drop_last = False)
    test_loader = DataLoader(test_data, batch_size = 64, shuffle = True, drop_last = False)

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    print(optimizer.state)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.n_epochs), eta_min=args.learning_rate_min)

    for epoch in range(args.n_epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        train_acc, train_obj = Train(train_loader, val_data, model, args, architect, weight_arch, optimizer)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(val_loader, model)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Example #12
0
def main():
    parser = ArgumentParser(description='train a MLP model')
    parser.add_argument('INPUT', type=str, help='path to input')
    parser.add_argument('EMBED', type=str, help='path to embedding')
    parser.add_argument('--gpu', '-g', default=-1, type=int, help='gpu number')
    args = parser.parse_args()

    word_to_id = word2id(args.INPUT)
    embedding = id2embedding(args.EMBED, word_to_id)

    train_loader = MyDataLoader(args.INPUT,
                                word_to_id,
                                batch_size=5000,
                                shuffle=True,
                                num_workers=1)
    # インスタンスを作成
    net = MLP(word_to_id, embedding)
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

    gpu_id = args.gpu
    device = torch.device("cuda:{}".format(gpu_id) if gpu_id >= 0 else "cpu")
    net = net.to(device)

    epochs = 5
    log_interval = 10
    for epoch in range(1, epochs + 1):
        net.train()  # おまじない (Dropout などを使う場合に効く)
        for batch_idx, (ids, mask, labels) in enumerate(train_loader):
            # data shape: (batchsize, 1, 28, 28)

            ids, mask, labels = ids.to(device), mask.to(device), labels.to(
                device)
            optimizer.zero_grad(
            )  # 最初に gradient をゼロで初期化; これを呼び出さないと過去の gradient が蓄積されていく
            output = net(ids, mask)
            output2 = F.softmax(output, dim=1)
            loss = F.binary_cross_entropy(output2[:, 1],
                                          labels.float())  # 損失を計算
            loss.backward()
            optimizer.step()  # パラメータを更新

            # 途中経過の表示
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(ids), len(train_loader.dataset),
                    10 * batch_idx / len(train_loader), loss.item()))
Example #13
0
def train_model(config, gpu_id, save_dir, exp_name):
    # Instantiating the model
    model_type = config.get('model_type', 'MLP')
    if model_type == "MLP":
        model = MLP(784, config["hidden_layers"], 10, config["nonlinearity"], config["initialization"], config["dropout"], verbose=True)
    elif model_type == "CNN":
        model = CNN(config["initialization"], config["is_batch_norm"], verbose=True)
    else:
        raise ValueError('config["model_type"] not supported : {}'.format(model_type))

    # Loading the MNIST dataset
    x_train, y_train, x_valid, y_valid, x_test, y_test = utils.load_mnist(config["data_file"], data_format=config["data_format"])

    if config['data_reduction'] != 1.:
        x_train, y_train = utils.reduce_trainset_size(x_train, y_train, config['data_reduction'])

    # If GPU is available, sends model and dataset on the GPU
    if torch.cuda.is_available():
        model.cuda(gpu_id)

        x_train = torch.from_numpy(x_train).cuda(gpu_id)
        y_train = torch.from_numpy(y_train).cuda(gpu_id)

        x_valid = Variable(torch.from_numpy(x_valid), volatile=True).cuda(gpu_id)
        y_valid = Variable(torch.from_numpy(y_valid), volatile=True).cuda(gpu_id)

        x_test = Variable(torch.from_numpy(x_test), volatile=True).cuda(gpu_id)
        y_test = Variable(torch.from_numpy(y_test), volatile=True).cuda(gpu_id)
        print("Running on GPU")
    else:
        x_train = torch.from_numpy(x_train)
        y_train = torch.from_numpy(y_train)

        x_valid = Variable(torch.from_numpy(x_valid))
        y_valid = Variable(torch.from_numpy(y_valid))

        x_test = Variable(torch.from_numpy(x_test))
        y_test = Variable(torch.from_numpy(y_test))
        print("WATCH-OUT : torch.cuda.is_available() returned False. Running on CPU.")

    # Instantiate TensorDataset and DataLoader objects
    train_set = torch.utils.data.TensorDataset(x_train, y_train)
    loader = torch.utils.data.DataLoader(train_set, batch_size=config["mb_size"], shuffle=True)

    # Optimizer and Loss Function
    optimizer = optim.SGD(model.parameters(), lr=config['lr'],
                                              momentum=config['momentum'],
                                              weight_decay=config['L2_hyperparam'] * (config['mb_size'] / x_train.size()[0]))
    loss_fn = nn.NLLLoss()

    # Records the model's performance
    train_tape = [[],[]]
    valid_tape = [[],[]]
    test_tape = [[],[]]
    weights_tape = []

    def evaluate(data, labels):

        model.eval()
        if not isinstance(data, Variable):
            if torch.cuda.is_available():
                data = Variable(data, volatile=True).cuda(gpu_id)
                labels = Variable(labels, volatile=True).cuda(gpu_id)
            else:
                data = Variable(data)
                labels = Variable(labels)

        output = model(data)
        loss = loss_fn(output, labels)
        prediction = torch.max(output.data, 1)[1]
        accuracy = (prediction.eq(labels.data).sum() / labels.size(0)) * 100

        return loss.data[0], accuracy

    if not os.path.exists(os.path.join(save_dir, exp_name)):
        os.makedirs(os.path.join(save_dir, exp_name))

    # Record train accuracy
    train_loss, train_acc = evaluate(x_train, y_train)
    train_tape[0].append(train_loss)
    train_tape[1].append(train_acc)

    # Record valid accuracy
    valid_loss, valid_acc = evaluate(x_valid, y_valid)
    valid_tape[0].append(valid_loss)
    valid_tape[1].append(valid_acc)

    # Record test accuracy
    test_loss, test_acc = evaluate(x_test, y_test)
    test_tape[0].append(test_loss)
    test_tape[1].append(test_acc)

    # Record weights L2 norm
    weights_L2_norm = model.get_weights_L2_norm()
    weights_tape.append(float(weights_L2_norm.data.cpu().numpy()))

    print("BEFORE TRAINING \nLoss : {0:.3f} \nAcc : {1:.3f}".format(valid_loss, valid_acc))

    # TRAINING LOOP
    best_valid_acc = 0
    for epoch in range(1, config["max_epochs"]):
        start = time.time()
        model.train()
        for i,(x_batch, y_batch) in enumerate(loader):

            #pdb.set_trace()

            if torch.cuda.is_available():
                x_batch = Variable(x_batch).cuda(gpu_id)
                y_batch = Variable(y_batch).cuda(gpu_id)
            else:
                x_batch = Variable(x_batch)
                y_batch = Variable(y_batch)

            # Empties the gradients
            optimizer.zero_grad()

            # Feedforward through the model
            output = model(x_batch)

            # Computes the loss
            loss = loss_fn(output, y_batch)

            # Backpropagates to compute the gradients
            loss.backward()

            # Takes one training step
            optimizer.step()

            # Record weights L2 norm
            weights_L2_norm = model.get_weights_L2_norm()
            weights_tape.append(float(weights_L2_norm.data.cpu().numpy()))

        # Record train accuracy
        train_loss, train_acc = evaluate(x_train, y_train)
        train_tape[0].append(train_loss)
        train_tape[1].append(train_acc)

        # Record valid accuracy
        valid_loss, valid_acc = evaluate(x_valid, y_valid)
        valid_tape[0].append(valid_loss)
        valid_tape[1].append(valid_acc)

        # Record test accuracy
        test_loss, test_acc = evaluate(x_test, y_test)
        test_tape[0].append(test_loss)
        test_tape[1].append(test_acc)

        print("Epoch {0} \nLoss : {1:.3f} \nAcc : {2:.3f}".format(epoch, valid_loss, valid_acc))
        print("Time : {0:.2f}".format(time.time() - start))

        # Saves the model
        if valid_acc > best_valid_acc:
            print("NEW BEST MODEL")
            torch.save(model.state_dict(), os.path.join(save_dir, exp_name, "model"))
            best_valid_acc = valid_acc

    # Saves the graphs
    utils.save_results(train_tape, valid_tape, test_tape, weights_tape, save_dir, exp_name, config)
    utils.update_comparative_chart(save_dir, config['show_test'])

    return
Example #14
0
def main():
    # check cuda
    device = f'cuda:{args.gpu}' if torch.cuda.is_available() and args.gpu >= 0 else 'cpu'
    # load data
    dataset = DglNodePropPredDataset(name=args.dataset)
    evaluator = Evaluator(name=args.dataset)

    split_idx = dataset.get_idx_split()
    g, labels = dataset[0] # graph: DGLGraph object, label: torch tensor of shape (num_nodes, num_tasks)
    
    if args.dataset == 'ogbn-arxiv':
        g = dgl.to_bidirected(g, copy_ndata=True)
        
        feat = g.ndata['feat']
        feat = (feat - feat.mean(0)) / feat.std(0)
        g.ndata['feat'] = feat

    g = g.to(device)
    feats = g.ndata['feat']
    labels = labels.to(device)

    # load masks for train / validation / test
    train_idx = split_idx["train"].to(device)
    valid_idx = split_idx["valid"].to(device)
    test_idx = split_idx["test"].to(device)

    n_features = feats.size()[-1]
    n_classes = dataset.num_classes
    
    # load model
    if args.model == 'mlp':
        model = MLP(n_features, args.hid_dim, n_classes, args.num_layers, args.dropout)
    elif args.model == 'linear':
        model = MLPLinear(n_features, n_classes)
    else:
        raise NotImplementedError(f'Model {args.model} is not supported.')

    model = model.to(device)
    print(f'Model parameters: {sum(p.numel() for p in model.parameters())}')

    if args.pretrain:
        print('---------- Before ----------')
        model.load_state_dict(torch.load(f'base/{args.dataset}-{args.model}.pt'))
        model.eval()

        y_soft = model(feats).exp()

        y_pred = y_soft.argmax(dim=-1, keepdim=True)
        valid_acc = evaluate(y_pred, labels, valid_idx, evaluator)
        test_acc = evaluate(y_pred, labels, test_idx, evaluator)
        print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}')

        print('---------- Correct & Smoothing ----------')
        cs = CorrectAndSmooth(num_correction_layers=args.num_correction_layers,
                              correction_alpha=args.correction_alpha,
                              correction_adj=args.correction_adj,
                              num_smoothing_layers=args.num_smoothing_layers,
                              smoothing_alpha=args.smoothing_alpha,
                              smoothing_adj=args.smoothing_adj,
                              autoscale=args.autoscale,
                              scale=args.scale)
        
        mask_idx = torch.cat([train_idx, valid_idx])
        y_soft = cs.correct(g, y_soft, labels[mask_idx], mask_idx)
        y_soft = cs.smooth(g, y_soft, labels[mask_idx], mask_idx)
        y_pred = y_soft.argmax(dim=-1, keepdim=True)
        valid_acc = evaluate(y_pred, labels, valid_idx, evaluator)
        test_acc = evaluate(y_pred, labels, test_idx, evaluator)
        print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}')
    else:
        opt = optim.Adam(model.parameters(), lr=args.lr)

        best_acc = 0
        best_model = copy.deepcopy(model)

        # training
        print('---------- Training ----------')
        for i in range(args.epochs):

            model.train()
            opt.zero_grad()

            logits = model(feats)
            
            train_loss = F.nll_loss(logits[train_idx], labels.squeeze(1)[train_idx])
            train_loss.backward()

            opt.step()
            
            model.eval()
            with torch.no_grad():
                logits = model(feats)
                
                y_pred = logits.argmax(dim=-1, keepdim=True)

                train_acc = evaluate(y_pred, labels, train_idx, evaluator)
                valid_acc = evaluate(y_pred, labels, valid_idx, evaluator)

                print(f'Epoch {i} | Train loss: {train_loss.item():.4f} | Train acc: {train_acc:.4f} | Valid acc {valid_acc:.4f}')

                if valid_acc > best_acc:
                    best_acc = valid_acc
                    best_model = copy.deepcopy(model)
        
        # testing & saving model
        print('---------- Testing ----------')
        best_model.eval()
        
        logits = best_model(feats)
        
        y_pred = logits.argmax(dim=-1, keepdim=True)
        test_acc = evaluate(y_pred, labels, test_idx, evaluator)
        print(f'Test acc: {test_acc:.4f}')

        if not os.path.exists('base'):
            os.makedirs('base')

        torch.save(best_model.state_dict(), f'base/{args.dataset}-{args.model}.pt')
class PolicyGradient:
    def __init__(self,
                 state_dim,
                 device='cpu',
                 gamma=0.99,
                 lr=0.01,
                 batch_size=5):
        self.gamma = gamma
        self.policy_net = MLP(state_dim)
        self.optimizer = torch.optim.RMSprop(self.policy_net.parameters(),
                                             lr=lr)
        self.batch_size = batch_size

    def choose_action(self, state):

        state = torch.from_numpy(state).float()
        state = Variable(state)
        probs = self.policy_net(state)
        m = Bernoulli(probs)
        action = m.sample()

        action = action.data.numpy().astype(int)[0]  # 转为标量
        return action

    def update(self, reward_pool, state_pool, action_pool):
        # Discount reward
        running_add = 0
        for i in reversed(range(len(reward_pool))):
            if reward_pool[i] == 0:
                running_add = 0
            else:
                running_add = running_add * self.gamma + reward_pool[i]
                reward_pool[i] = running_add

        # Normalize reward
        reward_mean = np.mean(reward_pool)
        reward_std = np.std(reward_pool)
        for i in range(len(reward_pool)):
            reward_pool[i] = (reward_pool[i] - reward_mean) / reward_std

        # Gradient Desent
        self.optimizer.zero_grad()

        for i in range(len(reward_pool)):
            state = state_pool[i]
            action = Variable(torch.FloatTensor([action_pool[i]]))
            reward = reward_pool[i]

            state = Variable(torch.from_numpy(state).float())
            probs = self.policy_net(state)
            m = Bernoulli(probs)
            loss = -m.log_prob(
                action) * reward  # Negtive score function x reward
            # print(loss)
            loss.backward()
        self.optimizer.step()

    def save_model(self, path):
        torch.save(self.policy_net.state_dict(), path)

    def load_model(self, path):
        self.policy_net.load_state_dict(torch.load(path))
Example #16
0
def train(FLAGS):
    """
    Train our embeddings.
    """

    # Get data loaders
    print ("==> Reading and processing the data ... ", end="")
    train_loader, test_loader, num_unique_words, \
        num_unique_documents, word_to_idx = process_data(
            data_dir=FLAGS.data_dir,
            vocab_size=FLAGS.vocab_size,
            window_size=FLAGS.window_size,
            split_ratio=FLAGS.split_ratio,
            batch_size=FLAGS.batch_size,
            )
    print ("[COMPLETE]")

    # Load pretrained GloVe embeddings for our vocab
    embedding_dir = os.path.join(basedir, "../../../../embeddings/glove")
    embedding_dim = 100
    embeddings = get_embeddings(
        embedding_dir=embedding_dir,
        embedding_dim=embedding_dim,
        words=word_to_idx.keys(),
        )

    # Initialize model, criterion, loss
    print ("==> Initializing model components ... ", end="")
    model = MLP(
        D_in_words=num_unique_words,
        D_in_documents=num_unique_documents,
        embedding_dim=FLAGS.embedding_dim,
        num_hidden_units=FLAGS.num_hidden_units,
        window_size=FLAGS.window_size,
        embeddings=embeddings,
        )
    # Objective
    criterion = torch.nn.CrossEntropyLoss()
    # Optimizer
    # Only get the parameters with gradients (we freeze our GloVe embeddings)
    parameters = filter(lambda param: param.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=FLAGS.lr)
    print ("[COMPLETE]")

    # Train the model
    print ("==> Training the model ... [IN PROGRESS]")
    model = training_procedure(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        train_loader=train_loader,
        test_loader=test_loader,
        num_epochs=FLAGS.num_epochs,
        learning_rate=FLAGS.lr,
        decay_rate=FLAGS.decay_rate,
        max_grad_norm=FLAGS.max_grad_norm,
        log_every=FLAGS.log_every,
        )
    print ("\n[COMPLETE]")

    # Save the model
    print ("==> Saving the model ... [IN PROGRESS]")
    torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt"))
    print ("\n[COMPLETE]")
Example #17
0
class NonLocalTrainer(object):
    def __init__(self, args,
                 trainLoader, testLoader):

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        self.out_path = args.out
        self.sigma = args.sigma
        self.beta = args.beta
        self.nClass = args.nClass

        self.model = MLP().to(self.device)
        self.optim = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        self.criterion = nn.MSELoss()

        self.trainLoader = trainLoader
        self.testLoader = testLoader

        self.run_datetime = datetime.datetime.now()

        if not os.path.exists(self.out_path):
            os.makedirs(self.out_path)

        self.logger = Logger(self.out_path)

        with open(os.path.join(self.out_path, "para.json"), "w") as f:
            json.dump(args.__dict__, f)

        self.epoch = 0
        self.iteration = 0
        self.test_step = 0
        self.max_epoch = args.epochs
        self.val_interval = args.interval
        self.res = 0
        self.best_error = 1e7;
        self.best_res_epoch = 0

        self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17)
        self.noiseStd = torch.div(torch.ones(args.batch_size, args.featureNums, 17, 17), 1e3)

    def validate_one_epoch(self):
        self.model.eval()
        self.test_step += 1

        tsthreas = [0.1, 1, 10]

        tp = [0] * len(tsthreas)  # true positive
        tn = [0] * len(tsthreas)  # true negetive
        fp = [0] * len(tsthreas)  # false positve
        fn = [0] * len(tsthreas)  # false negetive
        ts = [0] * len(tsthreas)

        totalRegressionLoss = []
        total_error = 0
        total_count = 0
        p_error = 0
        p_count = 0

        largeGapCount = 0
        largeGap = 0

        for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm(
                enumerate(self.testLoader), total=len(self.testLoader),
                desc='Valid :', ncols=80,
                leave=False):
            gt_micaps = target.numpy()
            data, target = data.to(device=self.device), target.to(device=self.device)

            with torch.no_grad():

                predictValues = self.model(data)

                regressionLoss = self.criterion(predictValues, target)

                predictNumpy = predictValues.cpu().numpy()
                totalRegressionLoss.append(regressionLoss.item())
                # totalClassificationLoss.append(classificationLoss.item())

                # predicted = torch.argmax(preds, dim=1)
                # correct += (predicted == logits).sum().item()

                gapValues = np.abs(predictNumpy - gt_micaps)
                total_error += np.sum(gapValues)
                total_count += gt_micaps.shape[0]
                p_error += np.sum((gt_micaps > 0.01) * gapValues)
                p_count += np.sum(gt_micaps > 0.01)

                largeGap += np.sum((gapValues > 5) * gapValues)
                largeGapCount += np.sum(gapValues > 5)

                for i, threas in enumerate(tsthreas):
                    tp[i] += np.sum((gt_micaps >= threas) * (predictNumpy >= threas))
                    tn[i] += np.sum((gt_micaps < threas) * (predictNumpy < threas))
                    fp[i] += np.sum((gt_micaps < threas) * (predictNumpy >= threas))
                    fn[i] += np.sum((gt_micaps >= threas) * (predictNumpy < threas))

        for i, _ in enumerate(tsthreas):
            ts[i] += round(tp[i] / (tp[i] + fp[i] + fn[i]), 5)

        totalAverageError = round(total_error / total_count, 5)
        pAverageError = round(p_error / p_count, 5)
        totalLoss = np.sum(totalRegressionLoss)
        largeGapRatio = round(largeGapCount / total_count, 5)
        largeGapMae = round(largeGap / largeGapCount, 5)

        info = {"test_regression_loss": totalLoss,
                "ts_score": ts,
                "aver_gap": totalAverageError,
                "aver_p_gap": pAverageError,
                "large_gap_ratio": largeGapRatio,
                "large_gap_mae": largeGapMae
                }
        print("========================== Epoch {} Test Result Show ==========================".format(self.epoch + 1))

        print(info)

        # for tag, value in info.items():
        #     self.logger.scalar_summary(tag, value, self.test_step)

        # if totalAverageError < self.best_error:
        #     self.best_error = totalAverageError
        #     self.best_res_epoch = self.epoch
        #     info["epoch"] = self.epoch
        #     info["modelParam"] = self.model.state_dict()
        #     info["optimParam"] = self.optim.state_dict()
        #     torch.save(info, os.path.join(self.out_path, str(self.epoch) + "_checkpoints.pth"))

    def train_one_epoch(self):
        self.model.train()

        for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm(
                enumerate(self.trainLoader), total=len(self.trainLoader),
                desc='Train epoch=%d' % self.epoch, ncols=80, leave=False):
            iter_idx = batch_idx + self.epoch * len(self.trainLoader)
            # if (self.iteration != 0) and (iter_idx - 1) != self.iteration:
            #     continue
            self.iteration = iter_idx

            assert self.model.training
            self.optim.zero_grad()

            data = data.to(device=self.device)
            target = target.to(device=self.device)

            predictValues = self.model(data)

            regressionLoss = self.criterion(predictValues, target)

            regressionLoss.backward()
            # for named,param in self.model.named_parameters():
            #     print("Name : " ,named)
            #     print(param.grad.data.sum())
            self.optim.step()

            regressionLossCpu = regressionLoss.item()
            self.logger.scalar_summary("train_regression_loss", regressionLossCpu, self.iteration + 1)

        for tag, value in self.model.named_parameters():
            self.logger.histo_summary(tag, value.data.cpu().numpy(), self.epoch + 1)
            self.logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), self.epoch + 1)

    def run(self):
        for epoch in range(self.max_epoch):
            self.epoch = epoch
            self.train_one_epoch()
            if (self.epoch + 1) % self.val_interval == 0:
                self.validate_one_epoch()
Example #18
0
X = (np.random.rand(10).reshape(-1, 1) - 1) / 2  # x between -0.5 and 0.
Y = f(X)
X = torch.from_numpy(X).type(torch.FloatTensor)
Y = torch.from_numpy(Y).type(torch.FloatTensor)

dataset = RegressionDataset(X, Y)

# Reproducibility
if args.seed is not None:
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

net = MLP()
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd)

# Load reference net if defined
if args.repulsive is not None:
    reference_net = model.MLP(dropout_rate=args.dropout_rate)
    reference_net.load_state_dict(torch.load(Path(args.repulsive)))

# Update of the network parameters
train_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)

# Sampling a repulsive bandwidth parameter
alpha = -3
beta = -0.5
bandwidth_repulsive = float(10**(alpha + (beta - alpha) * np.random.rand()))

# Preparation of the optimization
Example #19
0
            data_set_sizes.append(args.subset_fraction)
            cloned_outputs.append(None)

        # create training and validation loaders
        session_train_loader = torch.utils.data.DataLoader(
            data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.SubsetRandomSampler(
                sum(session_train_ids, [])))
        session_val_loader = torch.utils.data.DataLoader(
            data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.SubsetRandomSampler(
                sum(session_val_ids, [])))

        optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr)

        model_path_base = "base_{}".format(model_path)

        # joint training on all base tasks
        train_loss, val_loss = train_model(model,
                                           criterion,
                                           optimizer,
                                           session_train_loader,
                                           session_val_loader,
                                           task_id_dict=task_id_dict,
                                           outpath=model_path_base,
                                           device=device,
                                           store_model_internally=True)

        # update omega values
Example #20
0
    plt.show()

    # 						--------------
    # --------------------- building model
    # 						--------------
    #
    print(f"\t✅ building {args.network} model\n")
    # ---------------------------------------------
    if args.network == "mlp":
        net = MLP(input_neurons=mini_batch_inputs.shape[2]**2,
                  output_neurons=mini_batch_labels.shape[1],
                  learning_rate=args.learning_rate)
    elif args.network == "cnn":
        net = CNN(input_channels=mini_batch_inputs.shape[1],
                  output_neurons=mini_batch_labels.shape[1])
        optimizer = optim.Adam(net.parameters(), args.learning_rate)
    else:
        print("[❌] Network Not Supported!")
        sys.exit(1)

    # 						-------------------------------
    # --------------------- training and evaluating process
    # 						-------------------------------
    #
    print(f"\t✅ start training and evaluating process\n")
    # -----------------------------------------------------------
    valid_loss_min = np.Inf
    criterion = torch.nn.CrossEntropyLoss()
    start_time = time.time()
    history = {
        "train_loss": [],
Example #21
0
        best_model = None

        _dev_data_loader = DataLoader(dev_data, batch_size=32, shuffle=False)

        for idx, params in enumerate(HYPER_PARA):
            BATCH_SIZE, DROP_RT, LR, EPOCHS = params
            best_roc = []
            best_prc = []

            data_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

            for run in range(1):
                model = MLP(NUM_ELEM, EMBEDDING_DIM,  HIDDEN_DIM_ADD_ON, HIDDEN_DIM, NUM_CLS, NUM_LYS, ADD_ON_FEATS, 100,
                            DROP_RT)
                loss_func = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.parameters(), lr=LR)

                device = torch.device('cuda:0')
                model.to(device)

                last_roc = -1
                last_prc = -1
                epochs_no_imprv = 0
                for epoch in range(EPOCHS):
                    model.train()
                    epoch_loss = 0
                    batch = tqdm(data_loader)
                    for elem, label, lengths, feats in batch:
                        optimizer.zero_grad()
                        prediction = model(elem, lengths, feats)
                        # loss = torch.mean(F.cross_entropy(prediction, label, reduction='none')
Example #22
0
def black_box_function(opt_param):
    mean_pure_ratio1 = 0
    mean_pure_ratio2 = 0

    print('building model...')
    cnn1 = MLP(n_outputs=num_classes)
    cnn1.cuda()
    print(cnn1.parameters)
    optimizer1 = torch.optim.Adam(cnn1.parameters(), lr=learning_rate)

    cnn2 = MLP(n_outputs=num_classes)
    cnn2.cuda()
    print(cnn2.parameters)
    optimizer2 = torch.optim.Adam(cnn2.parameters(), lr=learning_rate)

    rate_schedule = opt_param.copy()
    print('Schedule:', rate_schedule)

    epoch = 0
    train_acc1 = 0
    train_acc2 = 0
    # evaluate models with random weights
    test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
    print(
        'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %% Pure Ratio1 %.4f %% Pure Ratio2 %.4f %%'
        % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2,
           mean_pure_ratio1, mean_pure_ratio2))
    # save results
    with open(txtfile, "a") as myfile:
        myfile.write(
            str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) +
            ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' +
            str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' +
            str(rate_schedule[epoch]) + "\n")

    # training
    for epoch in range(1, args.n_epoch):
        # train models
        cnn1.train()
        adjust_learning_rate(optimizer1, epoch)
        cnn2.train()
        adjust_learning_rate(optimizer2, epoch)
        train_acc1, train_acc2, pure_ratio_1_list, pure_ratio_2_list = train(
            train_loader, epoch, cnn1, optimizer1, cnn2, optimizer2,
            rate_schedule)
        # evaluate models
        test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
        # save results
        mean_pure_ratio1 = sum(pure_ratio_1_list) / len(pure_ratio_1_list)
        mean_pure_ratio2 = sum(pure_ratio_2_list) / len(pure_ratio_2_list)
        print(
            'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %%, Pure Ratio 1 %.4f %%, Pure Ratio 2 %.4f %%'
            % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1,
               test_acc2, mean_pure_ratio1, mean_pure_ratio2))
        with open(txtfile, "a") as myfile:
            myfile.write(
                str(int(epoch)) + ' ' + str(train_acc1) + ' ' +
                str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) +
                ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) +
                ' ' + str(rate_schedule[epoch]) + "\n")

    return (test_acc1 + test_acc2) / 200
class DQN:
    def __init__(self,
                 n_states,
                 n_actions,
                 gamma=0.99,
                 epsilon_start=0.9,
                 epsilon_end=0.05,
                 epsilon_decay=200,
                 memory_capacity=10000,
                 policy_lr=0.01,
                 batch_size=128,
                 device="cpu"):

        self.n_actions = n_actions  # 总的动作个数
        self.device = device  # 设备,cpu或gpu等
        self.gamma = gamma  # 奖励的折扣因子
        # e-greedy策略相关参数
        self.actions_count = 0  # 用于epsilon的衰减计数
        self.epsilon = 0
        self.epsilon_start = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size
        self.policy_net = MLP(n_states, n_actions).to(self.device)
        self.target_net = MLP(n_states, n_actions).to(self.device)
        # target_net的初始模型参数完全复制policy_net
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()  # 不启用 BatchNormalization 和 Dropout
        # 可查parameters()与state_dict()的区别,前者require_grad=True
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=policy_lr)
        self.loss = 0
        self.memory = ReplayBuffer(memory_capacity)

    def choose_action(self, state, train=True):
        '''选择动作
        '''
        if train:
            self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
                math.exp(-1. * self.actions_count / self.epsilon_decay)
            self.actions_count += 1
            if random.random() > self.epsilon:
                with torch.no_grad():
                    # 先转为张量便于丢给神经网络,state元素数据原本为float64
                    # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价
                    state = torch.tensor([state],
                                         device=self.device,
                                         dtype=torch.float32)
                    # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>)
                    q_value = self.policy_net(state)
                    # tensor.max(1)返回每行的最大值以及对应的下标,
                    # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0]))
                    # 所以tensor.max(1)[1]返回最大值对应的下标,即action
                    action = q_value.max(1)[1].item()
            else:
                action = random.randrange(self.n_actions)
            return action
        else:
            with torch.no_grad():  # 取消保存梯度
                # 先转为张量便于丢给神经网络,state元素数据原本为float64
                # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价
                state = torch.tensor(
                    [state], device='cpu', dtype=torch.float32
                )  # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>)
                q_value = self.target_net(state)
                # tensor.max(1)返回每行的最大值以及对应的下标,
                # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0]))
                # 所以tensor.max(1)[1]返回最大值对应的下标,即action
                action = q_value.max(1)[1].item()
            return action

    def update(self):

        if len(self.memory) < self.batch_size:
            return
        # 从memory中随机采样transition
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(
            self.batch_size)
        '''转为张量
        例如tensor([[-4.5543e-02, -2.3910e-01,  1.8344e-02,  2.3158e-01],...,[-1.8615e-02, -2.3921e-01, -1.1791e-02,  2.3400e-01]])'''
        state_batch = torch.tensor(state_batch,
                                   device=self.device,
                                   dtype=torch.float)
        action_batch = torch.tensor(action_batch,
                                    device=self.device).unsqueeze(
                                        1)  # 例如tensor([[1],...,[0]])
        reward_batch = torch.tensor(
            reward_batch, device=self.device,
            dtype=torch.float)  # tensor([1., 1.,...,1])
        next_state_batch = torch.tensor(next_state_batch,
                                        device=self.device,
                                        dtype=torch.float)
        done_batch = torch.tensor(np.float32(done_batch),
                                  device=self.device).unsqueeze(
                                      1)  # 将bool转为float然后转为张量
        '''计算当前(s_t,a)对应的Q(s_t, a)'''
        '''torch.gather:对于a=torch.Tensor([[1,2],[3,4]]),那么a.gather(1,torch.Tensor([[0],[1]]))=torch.Tensor([[1],[3]])'''
        q_values = self.policy_net(state_batch).gather(
            dim=1, index=action_batch)  # 等价于self.forward
        # 计算所有next states的V(s_{t+1}),即通过target_net中选取reward最大的对应states
        next_state_values = self.target_net(next_state_batch).max(
            1)[0].detach()  # 比如tensor([ 0.0060, -0.0171,...,])
        # 计算 expected_q_value
        # 对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward
        expected_q_values = reward_batch + self.gamma * \
            next_state_values * (1-done_batch[0])
        # self.loss = F.smooth_l1_loss(q_values,expected_q_values.unsqueeze(1)) # 计算 Huber loss
        self.loss = nn.MSELoss()(q_values,
                                 expected_q_values.unsqueeze(1))  # 计算 均方误差loss
        # 优化模型
        self.optimizer.zero_grad(
        )  # zero_grad清除上一步所有旧的gradients from the last step
        # loss.backward()使用backpropagation计算loss相对于所有parameters(需要gradients)的微分
        self.loss.backward()
        for param in self.policy_net.parameters():  # clip防止梯度爆炸
            param.grad.data.clamp_(-1, 1)

        self.optimizer.step()  # 更新模型

    def save_model(self, path):
        torch.save(self.target_net.state_dict(), path)

    def load_model(self, path):
        self.target_net.load_state_dict(torch.load(path))
Example #24
0
                                  config.batch_size,
                                  shuffle=False,
                                  num_workers=2)
print(f"{datetime.now().ctime()} - Finish Loading Dataset")

print(
    f"{datetime.now().ctime()} - Start Creating Net, Criterion, Optimizer and Scheduler..."
)
if config.model == "mlp":
    net = MLP(config.cifar10_input_size, config.num_classes)
elif config.model == "convnet":
    net = ConvNet(config.input_channel, config.num_classes)
elif config.model == "onelayer":
    net = OneLayer(config.fashionmnist_input_size, config.num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),
                      config.lr,
                      momentum=config.momentum,
                      weight_decay=config.weight_decay)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                 len(train_dataloader) *
                                                 config.epochs,
                                                 eta_min=config.eta_min)
print(
    f"{datetime.now().ctime()} - Finish Creating Net, Criterion, Optimizer and Scheduler"
)

print(f"{datetime.now().ctime()} - Start Training...")
print(
    f"Traing dataset: {len(train_dataset)}, iteration: {len(train_dataloader)}"
)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    # CLASSIFIER
    if args.use_conv:
        if args.imprint:

            model = ResNet18_imprint(num_classes=args.n_tasks*5)
            model.seen_classes = []
        else:
            model = ResNet18(args.n_classes, nf=20, input_size=args.input_size)
    else:
        model = MLP(args)
    if args.cuda:
        model = model.to(args.device)

    opt = torch.optim.SGD(model.parameters(), lr=args.lr)
    buffer = Buffer(args)
    if run == 0:
        print("number of classifier parameters:",
                sum([np.prod(p.size()) for p in model.parameters()]))
        print("buffer parameters: ", np.prod(buffer.bx.size()))

    #----------
    # Task Loop

    for task, tr_loader in enumerate(train_loader):

        sample_amt = 0

        model = model.train()
Example #26
0
def run():
    print(f'Running from {os.getcwd()}')
    train_config, val_config = get_split_configs()
    print(f'Running with\n\ttrain_config: {train_config}\n\tval_config: {val_config}')

    train = AugMNISTDataset(transforms=['color'], config=train_config)
    val = AugMNISTDataset(transforms=['color'], config=val_config)
    train_dataloader = torch.utils.data.DataLoader(train, shuffle=True, batch_size=args.batch_size, num_workers=8)
    val_dataloader = torch.utils.data.DataLoader(val, shuffle=True, batch_size=1000, num_workers=0)

    mlp_width = 512
    if args.use_l0:
        e_model = L0MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device)
        d_model = L0MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device)
    else:
        e_model = MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device)
        d_model = MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device)


    #summary(e_model, (13,))
    #summary(d_model, (13,))

    if args.optimizer == 'sgd':
        e_opt = torch.optim.SGD(e_model.parameters(), momentum=0.9, lr=args.lr)
        d_opt = torch.optim.SGD(d_model.parameters(), momentum=0.9, lr=args.lr)
    elif args.optimizer == 'adam':
        e_opt = torch.optim.Adam(e_model.parameters(), lr=args.lr)
        d_opt = torch.optim.Adam(d_model.parameters(), lr=args.lr)
    step = 0
    task = generate_task()
    decay_epochs = [60,90,120,150]
    e_sched = torch.optim.lr_scheduler.MultiStepLR(e_opt, milestones=decay_epochs, gamma=0.1)
    d_sched = torch.optim.lr_scheduler.MultiStepLR(d_opt, milestones=decay_epochs, gamma=0.1)
    for epoch in range(args.epochs):
        for idx, samples in enumerate(train_dataloader):
            features = get_features(samples).to(args.device)
            entangled_features = get_features(samples, entangle=True).to(args.device)
            labels = get_labels(samples, task).to(args.device)

            if args.use_l0:
                e_out, l0_e  = e_model(entangled_features)
                d_out, l0_d = d_model(features)
            else:
                e_out = e_model(entangled_features)
                d_out = d_model(features)

            e_pred = e_out > 0
            e_acc = (e_pred == labels).float().mean()
            d_pred = d_out > 0
            d_acc = (d_pred == labels).float().mean()

            e_bce = F.binary_cross_entropy_with_logits(e_out, labels)
            e_loss = e_bce
            d_bce = F.binary_cross_entropy_with_logits(d_out, labels)
            d_loss = d_bce

            # L0
            if args.use_l0:
                l0_coef = 1e-1
                d_loss += l0_coef * l0_d / len(samples)
                e_loss += l0_coef * l0_e / len(samples)

            # L1
            if epoch <= args.rampup_begin:
                l1_coef = args.warmup_l1
            else:
                l1_coef = args.warmup_l1 + args.l1 / (args.warmup_l1 + args.l1) * min(args.l1, args.l1 * (float(epoch) - args.rampup_begin) / (args.rampup_end-args.rampup_begin))

            d_loss += l1_coef * l1(d_model)
            e_loss += l1_coef * l1(e_model)

            e_loss.backward()
            e_grad = torch.nn.utils.clip_grad_norm_(e_model.parameters(), 100)
            e_opt.step()
            e_opt.zero_grad()

            d_loss.backward()
            d_grad = torch.nn.utils.clip_grad_norm_(d_model.parameters(), 100)
            d_opt.step()
            d_opt.zero_grad()

            if step % 250 == 0:
                stats = {}
                stats['step'] = step
                stats['train_acc/e'], stats['train_acc/d']  = e_acc, d_acc
                stats['train_loss/e'], stats['train_loss/d']  = e_loss, d_loss
                stats['train_bce/e'], stats['train_bce/d']  = e_bce, d_bce

                if args.warmup_l1 + args.l1 > 0:
                    stats['l1_coef'] = l1_coef

                d_nonzero, d_params = nonzero_params(d_model)
                e_nonzero, e_params = nonzero_params(e_model)
                stats['d_nonzero'], stats['e_nonzero'] = d_nonzero, e_nonzero

                with torch.no_grad():
                    val_samples = next(iter(val_dataloader))
                    val_features = get_features(val_samples).to(args.device)
                    val_entangled_features = get_features(val_samples, entangle=True).to(args.device)
                    val_labels = get_labels(val_samples, task)

                    if args.use_l0:
                        e_out = copy_and_zero(e_model)(val_entangled_features)[0].cpu()
                        d_out = copy_and_zero(d_model)(val_features)[0].cpu()
                    else:
                        e_out = copy_and_zero(e_model)(val_entangled_features).cpu()
                        d_out = copy_and_zero(d_model)(val_features).cpu()

                    stats['val_auc/e'] = metrics.roc_auc_score(val_labels, e_out)
                    stats['val_auc/d'] = metrics.roc_auc_score(val_labels, d_out)
                    stats['lr/e'], stats['lr/d'] = e_sched.get_lr()[0], d_sched.get_lr()[0]

                    e_pred = e_out > 0
                    e_acc = (e_pred == val_labels).float().mean()
                    d_pred = d_out > 0
                    d_acc = (d_pred == val_labels).float().mean()

                    stats['val_acc/e'], stats['val_acc/d'] = e_acc, d_acc

                    # Fetch k wrong predictions
                    #k = 10
                    #e_wrong_mask = [e_pred != val_labels]
                    #d_wrong_mask = [d_pred != val_labels]
                    #wrong_preds_e, ftrs_e = e_out[e_wrong_mask][:k], val_entangled_features[:k]
                    #wrong_preds_d, ftrs_d = d_out[d_wrong_mask][:k], val_features[:k]

                to_save = {
                    'd_model': d_model.state_dict(),
                    'e_model': e_model.state_dict(),
                    'd_opt': d_opt.state_dict(),
                    'e_opt': e_opt.state_dict()
                }
                torch.save(to_save, 'checkpoint.pt')
                if args.log_wandb:
                    wandb.log(stats)
                else:
                    print_stats(stats)

            step += 1
        e_sched.step()
        d_sched.step()
Example #27
0
model.cuda()

logger = NeptuneLogger(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                        project_name = "vladimir.isakov/sandbox",
                        experiment_name  = 'Run',
                        upload_source_files='./train.py',
                        #tags = 'v1',
                        params = {'batch_size': args.batch_size,
                                    'epochs': args.epochs,
                                    'lr': args.lr,
                                    'step_size': args.step_size,
                                    'gamma': args.gamma,
                                    'weight_decay': args.weight_decay,
                                    'model': repr(model)})

optimizer = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.weight_decay)

step_scheduler = StepLR(optimizer,
                        step_size=args.step_size,
                        gamma=args.gamma)

scheduler = LRScheduler(step_scheduler)

criterion = nn.CrossEntropyLoss()


def update(engine, batch):

    inputs, targets = batch
        val_dataset,
        batch_size=batch_size
    )
    print('Data Loaded!')

    '''Step 2: Model Initialization'''
    #Model
    model = MLP(input_dim, output_dim)
    model.to(device)
    # model.load_state_dict(torch.load('14_model8.pth.tar'))

    #Loss Function
    criterion = nn.CrossEntropyLoss()

    #Optimizer
    optimizer = Adam(model.parameters(),lr=lr)

    '''Step 3: Train the Model'''
    print('Training begins: ')

    global_acc = 0
    for epoch in range(num_epoch):
        epoch = epoch+1
        print(f'Epoch {epoch} starts:')
        train_start = time.time()
        train_loss, train_acc = Train(
            train_dataloader,
            model,
            criterion,
            optimizer
        )
Example #29
0
class Agent():
    def __init__(self, test=False):
        # device
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else :
            self.device = torch.device('cpu')
        
        self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device)  
        if test:
            self.load('./pg_best.cpt')        
        # discounted reward
        self.gamma = 0.99 
        # optimizer
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3)
        # saved rewards and actions
        self.memory = Memory()
        self.tensorboard = TensorboardLogger('./')
    def save(self, save_path):
        print('save model to', save_path)
        torch.save(self.model.state_dict(), save_path)
    def load(self, load_path):
        print('load model from', load_path)
        self.model.load_state_dict(torch.load(load_path))
    def act(self,x,test=False):
        if not test:
            # boring type casting
            x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device)
            # stochastic sample
            action_prob = self.model(x)
            dist = torch.distributions.Categorical(action_prob)
            action = dist.sample()
            # memory log_prob
            self.memory.logprobs.append(dist.log_prob(action))
            return action.item()    
        else :
            self.model.eval()
            x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device)
            with torch.no_grad():
                action_prob = self.model(x)
                # a = np.argmax(action_prob.cpu().numpy())
                dist = torch.distributions.Categorical(action_prob)
                action = dist.sample()
                return action.item()
    def collect_data(self, state, action, reward):
        self.memory.actions.append(action)
        self.memory.rewards.append(torch.tensor(reward))
        self.memory.states.append(state)
    def clear_data(self):
        self.memory.clear_memory()

    def update(self):
        R = 0
        advantage_function = []        
        for t in reversed(range(0, len(self.memory.rewards))):
            R = R * self.gamma + self.memory.rewards[t]
            advantage_function.insert(0, R)

        # turn rewards to pytorch tensor and standardize
        advantage_function = torch.Tensor(advantage_function).to(self.device)
        advantage_function = (advantage_function - advantage_function.mean()) / (advantage_function.std() + np.finfo(np.float32).eps)

        policy_loss = []
        for log_prob, reward in zip(self.memory.logprobs, advantage_function):
            policy_loss.append(-log_prob * reward)
        # Update network weights
        self.optimizer.zero_grad()
        loss = torch.cat(policy_loss).sum()
        loss.backward()
        self.optimizer.step() 
        # boring log
        self.tensorboard.scalar_summary("loss", loss.item())
        self.tensorboard.update()
Example #30
0
    
    loaders = [train_loader, valid_loader, test_loader, trainA_loader, trainB_loader, validA_loader, validB_loader]
    names = ['train_loader','valid_loader', 'test_loader',"trainA_loader", "trainB_loader", "validA_loader", "validB_loader"]
    for loader, name in zip(loaders, names):
        train_iter = iter(loader)
        for _ in range(2):
            _, target = train_iter.next()
            print(f'{name}', ': Classes {}, counts: {}'.format(
                *np.unique(target.numpy(), return_counts=True)))

   
    #############################
    #########Base Line############
    ##############################
    model = MLP()
    model = model.to(device)
    for name, param in model.named_parameters():
        if param.device.type != 'cuda':
            print('param {}, not on GPU'.format(name))

    optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    wandb.init(
        project='Seq Boost2',
        config=config,
        name="Baseline p={} mu={} eta={}".format(P,M,E))

    model, train_loss, valid_loss = train(model, train_loader, valid_loader, batch_size=BATCH_SIZE, wandb_log=True,
                                          consolidate=False, patience=EARLY_STOPPING, n_epochs=config['epoch'])
    evaluate(model, test_loader, batch_size = BATCH_SIZE)
Example #31
0
def main():

    np.random.seed(args.seed)
    cur_acc = 0
    max_acc = 0
    num_param = 20
    cur_param = np.zeros(args.n_epoch)
    max_pt = np.zeros(args.n_epoch)
    for iii in range(args.n_iter):
        for jjj in range(args.n_samples):
            cur_a = np.random.randn(10)
            cur_w = np.random.randn(10)
            cur_b = np.random.randn(10)
            x = np.arange(args.n_epoch) / args.n_epoch
            cur_rt = np.dot(np.outer(x, cur_w) + cur_b, cur_a)
            cur_rt = 1 / (1 + np.exp(-cur_rt))
            cur_param = cur_rt.copy()
            cur_acc = black_box_function(cur_param)
            if max_acc < cur_acc:
                max_acc = cur_acc
                max_pt = cur_param.copy()
    '''
    rate_schedule=np.ones(args.n_epoch)*forget_rate
    rate_schedule[:10]=np.arange(10,dtype=float)/10*forget_rate
    # rate_schedule[10:]=np.arange(args.n_epoch-10,dtype=float)/(args.n_epoch-10)*forget_rate+forget_rate
    rate_schedule=np.zeros(args.n_epoch)
    print(rate_schedule)
    '''
    rate_schedule = max_pt.copy()
    print('Final Schedule:', rate_schedule)

    mean_pure_ratio1 = 0
    mean_pure_ratio2 = 0

    print('building model...')
    cnn1 = MLP(n_outputs=num_classes)
    cnn1.cuda()
    print(cnn1.parameters)
    optimizer1 = torch.optim.Adam(cnn1.parameters(), lr=learning_rate)

    cnn2 = MLP(n_outputs=num_classes)
    cnn2.cuda()
    print(cnn2.parameters)
    optimizer2 = torch.optim.Adam(cnn2.parameters(), lr=learning_rate)

    epoch = 0
    train_acc1 = 0
    train_acc2 = 0
    # evaluate models with random weights
    test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
    print(
        'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %% Pure Ratio1 %.4f %% Pure Ratio2 %.4f %%'
        % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2,
           mean_pure_ratio1, mean_pure_ratio2))
    # save results
    with open(txtfile, "a") as myfile:
        myfile.write(
            str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) +
            ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' +
            str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' +
            str(rate_schedule[epoch]) + "\n")

    # training
    for epoch in range(1, args.n_epoch):
        # train models
        cnn1.train()
        adjust_learning_rate(optimizer1, epoch)
        cnn2.train()
        adjust_learning_rate(optimizer2, epoch)
        train_acc1, train_acc2, pure_ratio_1_list, pure_ratio_2_list = train(
            train_loader, epoch, cnn1, optimizer1, cnn2, optimizer2,
            rate_schedule)
        # evaluate models
        test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
        # save results
        mean_pure_ratio1 = sum(pure_ratio_1_list) / len(pure_ratio_1_list)
        mean_pure_ratio2 = sum(pure_ratio_2_list) / len(pure_ratio_2_list)
        print(
            'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %%, Pure Ratio 1 %.4f %%, Pure Ratio 2 %.4f %%'
            % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1,
               test_acc2, mean_pure_ratio1, mean_pure_ratio2))
        with open(txtfile, "a") as myfile:
            myfile.write(
                str(int(epoch)) + ' ' + str(train_acc1) + ' ' +
                str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) +
                ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) +
                ' ' + str(rate_schedule[epoch]) + "\n")
if __name__ == '__main__':
    train_filename = "dataset/adult.train.npz"
    test_filename = "dataset/adult.test.npz"
    epochs = 50
    batch_size = 32
    lr = 1e-3
    eval_every = 1

    train_dataloader = make_dataloader(train_filename,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       drop_last=True)
    test_dataloader = make_dataloader(test_filename,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      drop_last=False)

    mlp = MLP()
    loss = torch.nn.BCEWithLogitsLoss()
    optimizer = optim.SGD(mlp.parameters(), lr)

    for epoch in range(epochs):
        train_loss = train(train_dataloader, mlp, loss, optimizer)
        print(f"epoch: {epoch}, train loss: {train_loss}")

        if epoch % eval_every == 0:
            validate_loss, p, r, auc = validate(test_dataloader, mlp, loss)
            print(
                f"epoch: {epoch}, validate loss: {validate_loss}, precision: {p}, recall: {r}, auc: {auc}"
            )