예제 #1
0
 def test_mnist_noisy_10_split_20(self):
   batch_size = 128
   train_set, _, _ = data_loader.load_dataset('mnist', './data', batch_size=batch_size, shuffle=False, split_seed=42, split=0.2)
 
   train_noisy, _, _ = data_loader.load_dataset('mnist', './data', batch_size=batch_size, shuffle=False, noise=0.1, split_seed=42, split=0.2)
   
   noisy_labels = train_noisy.dataset.targets
   clean_labels = train_set.dataset.targets
   
   diff = (np.array(clean_labels) != np.array(noisy_labels)).sum() / (len(clean_labels) * 0.8)
   assert diff > 0.09
예제 #2
0
 def test_mnist_noisy_0(self):
   batch_size = 128
   train_set, _, _ = data_loader.load_dataset('mnist', './data', batch_size=batch_size, shuffle=False)
 
   train_noisy, _, _ = data_loader.load_dataset('mnist', './data', batch_size=batch_size, shuffle=False, noise=0.)
   
   noisy_labels = train_noisy.dataset.targets
   
   clean_labels = train_set.dataset.targets
   
   diff = (np.array(clean_labels) != np.array(noisy_labels)).sum()
   assert diff == 0.
예제 #3
0
 def test_cifar10_noisy_10(self):
   batch_size = 128
   train_set, _, _ = data_loader.load_dataset('cifar10', './data', batch_size=batch_size, shuffle=False)
 
   train_noisy, _, _ = data_loader.load_dataset('cifar10', './data', batch_size=batch_size, shuffle=False, noise=0.1)
   
   noisy_labels = train_noisy.dataset.targets
   
   clean_labels = train_set.dataset.targets
   
   diff = (np.array(clean_labels) != np.array(noisy_labels)).sum() / len(clean_labels)
   assert diff > 0.09
def model_test(net, batch_size=2):
    
    x_tensor, y_tensor, m_tensor = load_dataset(mode='test', resize=True, resize_shape=(256, 256))
    num_samples = x_tensor.shape[0]
    print("[+] ====== Start test... ======")
    num_iters = int(np.ceil(num_samples / batch_size))
    for ite in range(num_iters):
        print("[*] predicting on the {}th batch".format(ite + 1))
        if not ite == num_iters - 1:
            start_id, end_id = ite * batch_size, (ite + 1) * batch_size
            bat_img = torch.Tensor(x_tensor[start_id : end_id, :, :, :])
            bat_label = torch.Tensor(y_tensor[start_id : end_id, 0: 1, :, :])
            #bat_mask_2ch = torch.Tensor(m_tensor[start_id : end_id, :, :, :])
            bat_mask = torch.Tensor(m_tensor[start_id : end_id, 0: 1, :, :])
        else:
            start_id = ite * batch_size
            bat_img = torch.Tensor(x_tensor[start_id : , :, :, :])
            bat_label = torch.Tensor(y_tensor[start_id : , 0: 1, :, :])
            #bat_mask_2ch = torch.Tensor(m_tensor[start_id : end_id, :, :, :])
            bat_mask = torch.Tensor(m_tensor[start_id : , 0: 1, :, :])
        bat_pred = net(bat_img)
        bat_pred_class = (bat_pred > 0.5).float() * bat_mask
        eval_print_metrics(bat_label, bat_pred, bat_mask)
        # plt.imshow(bat_pred[0,0,:,:].detach().numpy(), cmap='jet')#, vmin=0, vmax=1)
        # plt.colorbar()
        # plt.show()
        #bat_pred_class = bat_pred.detach() * bat_mask
        paste_and_save(bat_img, bat_label, bat_pred_class, batch_size, ite + 1)

    return
예제 #5
0
def main():

    train_data, val_data, train_label, val_label, tokenizer = load_dataset(
        'train',
        FLAGS.dev_sample_percentage,
        FLAGS.max_len,
    )

    if FLAGS.optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam()
    elif FLAGS.optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD()


#    char_cnn = CharCNN(tokenizer, len(train_label[0]),  FLAGS.dropout_prob)

    char_cnn = create_model(tokenizer, len(train_label[0]), FLAGS.dropout_prob,
                            FLAGS.max_len)

    char_cnn.compile(optimizer=optimizer,
                     loss=tf.keras.losses.categorical_crossentropy,
                     metrics=['accuracy'])
    char_cnn.summary()

    callbacks_list = [save()]

    char_cnn.fit(train_data,
                 train_label,
                 batch_size=FLAGS.batch_size,
                 epochs=FLAGS.num_epochs,
                 validation_data=(val_data, val_label),
                 callbacks=callbacks_list)
def search_learning_rates():
    learning_rates = [0.01, 0.001, 0.0001]
    models = {}

    train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = data_loader.load_dataset(
    )
    train_set_x, test_set_x = data_loader.preprocess_dataset(
        train_set_x_orig, test_set_x_orig)

    for i in learning_rates:
        print("learning rate is: " + str(i))
        models[str(i)] = model(train_set_x,
                               train_set_y,
                               test_set_x,
                               test_set_y,
                               num_iterations=1500,
                               learning_rate=i,
                               print_cost=False)
        print('\n' +
              "-------------------------------------------------------" + '\n')

    for i in learning_rates:
        pyplot.plot(numpy.squeeze(models[str(i)]["costs"]),
                    label=str(models[str(i)]["learning_rate"]))

    pyplot.ylabel('cost')
    pyplot.xlabel('iterations (hundreds)')

    legend = pyplot.legend(loc='upper center', shadow=True)
    frame = legend.get_frame()
    frame.set_facecolor('0.90')
    pyplot.show()
예제 #7
0
def main():
    if not torch.cuda.is_available():
        logger.info("no gpu device available")
        sys.exit(1)

    logger.info("*** Begin {} ***".format(config.stage))

    # set default gpu device
    torch.cuda.set_device(config.gpus[0])

    # set random seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    logger.info("preparing data...")
    input_size, channels_in, num_classes, train_data, valid_data = \
        load_dataset(dataset=config.dataset,
                     data_dir=config.data_dir,
                     cutout_length=0,
                     validation=True,
                     auto_aug=config.auto_aug)

    train_loader = torch.utils.data.DataLoader(
        dataset=train_data,
        batch_size=config.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            list(range(config.total_samples))),
        num_workers=config.num_workers,
        pin_memory=True)

    valid_loader = torch.utils.data.DataLoader(
        dataset=valid_data,
        batch_size=config.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            list(range(config.total_samples))),
        num_workers=config.num_workers,
        pin_memory=True)

    logger.info("loading model...")
    model = utils.load_checkpoint(config.model_dir)
    model = model.to(device)
    model.eval()

    config.num_cells = len(model._cells)
    config.num_nodes = len(model._cells[0]._dag)

    logger.info("start computing...")
    #compute_offline(train_loader, model, config.feature_dir)
    compute_online(train_loader, model, config.feature_dir)

    logger.info("*** Finish {} ***".format(config.stage))
예제 #8
0
    def search_from_path(self, query_path, verification=True):
        # 3.1 inference query images to descriptors (infer_image_to_des)
        self.query_image_paths, self.query_image_labels = load_dataset(
            query_path, no_label=True)

        self.query_result = self.infer_image_to_des(
            self.query_image_paths, self.query_image_labels
        )  # result['locations'], result['descriptors']
        query_des_np = np.concatenate(np.asarray(
            self.query_result['descriptors']),
                                      axis=0)
        # index table for query set
        self.query_des_from_img, self.query_img_from_des = make_index_table(
            self.query_result['descriptors'])
        query_img_idx = list(self.query_des_from_img.keys())

        # 3.2 pq search
        k = 60  # k nearest neighber

        _, query_des2desList = self.pq.search(query_des_np, k)

        # 3.3 find similar image list by frequency score (get_similar_img(mode='frequency', searched_des))

        query_des2imgList = {}

        # travel query images' inferenced descriptors
        for img_i, des_list in enumerate(query_des2desList):
            # map inferenced descirptors to their parents' image index
            query_des2imgList[img_i] = [
                self.img_from_des[des_i] for des_i in des_list
            ]
        """
            query_des2imgList = {
                image_index: [list of image index of each descriptor]}
        """

        query_img2imgFreq = self.get_similar_img(query_des2imgList)
        self.result = query_img2imgFreq

        # 3.4 verification by ransac (rerank)
        if verification:
            query_inlier_rank = self.get_ransac_result(query_img2imgFreq)
            self.result = query_inlier_rank


#         # 3.5 index to image path
        for query_i in self.result:
            top_k_img_i_list = self.result[query_i]['index']
            top_k_img_path = [
                self.db_image_paths[img_i] for img_i in top_k_img_i_list
            ]
            self.result[query_i]['path'] = top_k_img_path
        self.result = query_img2imgFreq

        return self.result
예제 #9
0
파일: test.py 프로젝트: susan0199/StacNAS
def main():
    if not torch.cuda.is_available():
        logger.info("no gpu device available")
        sys.exit(1)

    logger.info("*** Begin {} ***".format(config.stage))

    # set default gpu device
    torch.cuda.set_device(config.gpus[0])

    # set random seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    logger.info("preparing data...")
    input_size, channels_in, num_classes, train_data, valid_data = \
        load_dataset(dataset=config.dataset,
                     data_dir=config.data_dir,
                     cutout_length=config.cutout_length,
                     validation=True,
                     auto_aug=config.auto_aug)

    valid_loader = torch.utils.data.DataLoader(dataset=valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.num_workers,
                                               pin_memory=True)

    logger.info("loading model...")
    if config.load_model_dir is not None:
        model = torch.load(config.load_model_dir)
    else:
        model = utils.load_checkpoint(config.model_dir)
    model = model.to(device)

    model_size = utils.param_size(model)
    logger.info("model_size: {:.3f} MB".format(model_size))

    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(num_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    logger.info("start testing...")
    best_top1 = test(valid_loader, model, criterion)

    logger.info("Final Prec@1: {:.4%}".format(best_top1))
    logger.info("*** Finish {} ***".format(config.stage))
예제 #10
0
def main(dataset_path, seed):

    expert_user_trajectory = load_dataset(dataset_path)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    model = GANModel(expert_user_trajectory)
    Reward = model.train()

    with open("./hyper_batch_256_D1r_0.0005_5000.json", "w") as f:
        json.dump(Reward, f)
예제 #11
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Build data loader
    dataset, targets = load_dataset()

    # Build the models
    mlp = MLP(args.input_size, args.output_size)

    load_model = True
    if load_model:
        mlp.load_state_dict(torch.load('../models/cae_encoder.pkl'))

    if torch.cuda.is_available():
        mlp.cuda()

    # Loss and Optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adagrad(mlp.parameters())

    # Train the Models
    total_loss = []
    print(len(dataset))
    print(len(targets))
    sm = 10  # start saving models after 100 epochs
    for epoch in range(args.num_epochs):
        print("epoch" + str(epoch))
        avg_loss = 0
        for i in range(0, len(dataset), args.batch_size):
            # Forward, Backward and Optimize
            mlp.zero_grad()
            bi, bt = get_input(i, dataset, targets, args.batch_size)
            bi = to_var(bi)
            bt = to_var(bt)
            bo = mlp(bi)
            loss = criterion(bo, bt)
            avg_loss = avg_loss + loss.data.item()
            loss.backward()
            optimizer.step()
        print("--average loss:")
        print(avg_loss / (len(dataset) / args.batch_size))
        total_loss.append(avg_loss / (len(dataset) / args.batch_size))
        # Save the models
        if epoch == sm:
            model_path = 'mlp_100_4000_PReLU_ae_dd' + str(sm) + '.pkl'
            torch.save(mlp.state_dict(),
                       os.path.join(args.model_path, model_path))
            sm = sm + 50  # save model after every 50 epochs from 100 epoch ownwards
    torch.save(total_loss, 'total_loss.dat')
    model_path = 'mlp_100_4000_PReLU_ae_dd_final.pkl'
    torch.save(mlp.state_dict(), os.path.join(args.model_path, model_path))
예제 #12
0
def main(config):

    # ensure reproducibility
    torch.manual_seed(config.random_seed)
    kwargs = {}
    if config.cuda:
        torch.cuda.manual_seed(config.random_seed)
        kwargs = {'num_workers': 1, 'pin_memory': True}

    scores = []
    # instantiate data loaders
    count = 0
    times = []

    for i in [1, 2, 3]:
        start = time.time()
        count = i
        train_data, test_data = load_dataset(config.data_dir, str(count))
        # instantiate data loaders

        data_loader = get_train_valid_loader(train_data, config.batch_size,
                                             config.random_seed,
                                             config.valid_size, config.shuffle,
                                             config.show_sample, **kwargs)

        test_loader = get_test_loader(test_data, config.batch_size, **kwargs)

        # instantiate trainer
        trainer = Trainer(config, count, data_loader, test_loader)

        trainer.train()
        result = trainer.test()

        scores.append(result)
        elapsed = time.time() - start
        times.append(elapsed)

    scores = np.array(scores)
    times = np.array(times)
    print('>>> scores:', scores)
    print('aver time', times.mean())
    # print('avg\tacc\tf1\tprec\trec\tauc')
    print('acc:',
          scores.mean(axis=0)[0], '\nf1',
          scores.mean(axis=0)[1], '\nprec',
          scores.mean(axis=0)[2], '\nrec',
          scores.mean(axis=0)[3])
    print('>>> std')
    print('acc:',
          scores.std(axis=0)[0], '\nf1',
          scores.std(axis=0)[1], '\nprec',
          scores.std(axis=0)[2], '\nrec',
          scores.std(axis=0)[3])
예제 #13
0
 def test_mnist_shuffle_split_0(self):
     batch_size = 100
     train_loader, test_loader, val_loader = data_loader.load_dataset(
         'mnist',
         './data',
         batch_size=batch_size,
         shuffle=True,
         augmentation=False,
         noise=0.,
         split_seed=42,
         split=0.)
     assert val_loader is None
     assert len(train_loader) == 600
예제 #14
0
 def test_cifar10_shuffle_split_99(self):
     batch_size = 100
     train_loader, test_loader, val_loader = data_loader.load_dataset(
         'cifar10',
         './data',
         batch_size=batch_size,
         shuffle=True,
         augmentation=False,
         noise=0.,
         split_seed=42,
         split=0.99)
     assert len(val_loader) == 495
     assert len(train_loader) == 5
예제 #15
0
 def test_mnist_split_40(self):
     batch_size = 100
     train_loader, test_loader, val_loader = data_loader.load_dataset(
         'mnist',
         './data',
         batch_size=batch_size,
         shuffle=False,
         augmentation=False,
         noise=0.,
         split_seed=42,
         split=0.4)
     assert len(val_loader) == 240
     assert len(train_loader) == 360
예제 #16
0
def main():
    train_data, val_data, train_label, val_label, tokenizer = load_dataset() 
    print('train_data[0]', len(train_label[0]))
    char_cnn = CharCNN(tokenizer, len(train_label[0]))

    inputs = Input(shape=(1014,), name='input')
    outputs = char_cnn(inputs)

    model = Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.categorical_crossentropy,
                  metrics=['accuracy'])
    model.fit(train_data, train_label, batch_size=256, epochs=5, validation_data=(val_data,val_label))
예제 #17
0
def main(load_embedding=False, max_runs=5, batch_size=1):
    sents, labels, n_classes = utils.load_imdb_dataset()
    # sents, labels, n_classes = utils.load_rotten_tomatoes_dataset()
    print(f'distribution of classes: {Counter(labels)}')
    tokenized_sents = utils.tokenize(sents)
    tokenized_sents, labels = utils.remove_empty_tokenizedsents(
        tokenized_sents, labels)
    if load_embedding:
        embeddings, word_id_map = utils.load_glove_embedding_map(
            torch_flag=True)
        print(f'loaded pretrained embedding of shape: ', embeddings.shape)
    else:
        print(f'NO pretrained embedding loaded')
        word_id_map = utils.get_word_id_map(enrich_vocab=True)
        embeddings = None

    sents_as_ids = utils.convert_word_to_id(tokenized_sents, word_id_map)
    # sents_as_ids = [utils.pad_token_ids(s,word_id_map['PAD'],max_len=30) for s in sents_as_ids]

    accs = []
    f_scores = []
    for run in range(1, max_runs + 1):
        train_sent_ids, test_sent_ids, y_train, y_test = train_test_split(
            sents_as_ids, labels, test_size=0.3, random_state=randint(1, 100))
        dataset = data_loader.load_dataset(train_sent_ids, y_train, device)
        train_loader = DataLoader(dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=0)
        print(
            f'sample lengths - train: {len(y_train)} and test: {len(y_test)}')

        model = train(train_loader,
                      embeddings,
                      n_epochs=20,
                      vocab_size=len(word_id_map),
                      lr=0.05,
                      use_pretrained_wv=load_embedding,
                      n_classes=n_classes)
        try:
            a, f = test_batch(model, test_sent_ids, y_test)
        except:
            a, f = test_stochastic(model, test_sent_ids, y_test)
        accs.append(a)
        f_scores.append(f)
        print(f'run: {run}, acc: {a}, f1: {f}')
    print(
        f'average of {max_runs} runs acc: {sum(accs)/len(accs)}, f1: {sum(f_scores)/len(f_scores)}'
    )
예제 #18
0
def subtype_select(subtype):
    """
    this function returns Features (X) and labels (y) representing subtype.
    """
    X = data_loader.load_dataset()
    y = data_loader.load_target()

    X = X.subtract(X.mean())

    subtypes = ['HER2+', 'HR+', 'Triple Neg']
    subtypes.remove(subtype)

    y = y.replace(subtype, 1)
    y = y.replace(subtypes, 0)

    return X, y
예제 #19
0
def select_subtype(subtype):
    """
    this function returns Features (X) and labels (y) representing subtype.
    """
    X = data_loader.load_dataset()
    y = data_loader.load_target()

    # apply mean centering to for each region
    X = X.subtract(X.mean())

    # all subtypes
    subtypes = ['HER2+', 'HR+', 'Triple Neg']
    # remove the current subtype from the whole subtype set
    subtypes.remove(subtype)

    # re-coding the subtypes to 0 and 1
    y = y.replace(subtype, 1)
    y = y.replace(subtypes, 0)

    return X, y
예제 #20
0
    def attach_db_from_path(self,
                            db_path,
                            ignore_cache=False,
                            cache_path='result_cache_hub.joblib',
                            filename_path='filename_path.joblib'):

        # 2.1
        self.db_image_paths, self.db_image_labels = load_dataset(db_path)
        # ignore cache loading & execute inference
        if ignore_cache or not os.path.exists(cache_path):

            self.db_result = self.infer_image_to_des(
                self.db_image_paths, self.db_image_labels
            )  # result['locations'], result['descriptors']
            # cache save
            with open(cache_path, 'wb') as f:
                joblib.dump(self.db_result, f)
#             with open(filename_path, 'wb') as f:
#                 joblib.dump(self.db_image_paths, f)

# exist cache file
        else:
            print("no inference on db")
            with open(cache_path, 'rb') as f:
                self.db_result = joblib.load(f)
#             with open(filename_path, 'rb') as f:
#                 self.db_image_paths = joblib.load(f)
#         for i in range(20):
#             print('{}th descriptors num: {}'.format(i, self.db_result['descriptors'][i].shape))

# 2.2
        self.des_from_img, self.img_from_des = make_index_table(
            self.db_result['descriptors'])

        # 2.3
        descriptors_np = np.concatenate(np.asarray(
            self.db_result['descriptors']),
                                        axis=0)
        if not self.pq.is_trained:
            self.pq.train(descriptors_np)
        self.pq.add(descriptors_np)
예제 #21
0
def test():

    if FLAGS.optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam()
    elif FLAGS.optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD()

    test_data, test_label, tokenizer = load_dataset('test', max_len=MAX_LEN)

    char_cnn = create_model(tokenizer, len(test_label[0]), 0, max_len=MAX_LEN)

    if os.path.isfile(FLAGS.weights_path):
        #char_cnn = load_model(FLAGS.weights_path)
        char_cnn.load_weights(FLAGS.weights_path)

    char_cnn.compile(optimizer=optimizer,
                     loss=tf.keras.losses.categorical_crossentropy,
                     metrics=['accuracy'])

    output = char_cnn.evaluate(test_data, test_label)
    print(output)
예제 #22
0
def get_cluster_index(function_name, dataset_name):
    """
    Get a cluster evaluation function for a specific dataset.

    Parameters
    ----------
    function_name: string
    dataset_name: string

    Returns
    -------
    eval_func: 1d function
        Index for evaluating a clustering final partition.
    task: 'min' or 'max'
        Type of problem (minimization or maximization)
    """
    X, y = data_loader.load_dataset(dataset_name)
    if function_name == 'davies_bouldin':
        return davies_bouldin(X, y), 'min'
    else:
        return xie_beni(X, y), 'min'
예제 #23
0
파일: main.py 프로젝트: linmengsysu/IsoNN
def main(config):

    # ensure reproducibility
    torch.manual_seed(config.random_seed)

    scores = []
    # instantiate data loaders
    count = 0
    times = []

    for i in range(1, 4):
        start = time.time()
        count = i
        train_data, test_data = load_dataset(config.data_dir, str(count))
        # instantiate data loaders
        data_loader = get_train_loader(train_data, config.batch_size,
                                       config.random_seed, config.shuffle)

        test_loader = get_test_loader(test_data, config.batch_size)

        # instantiate trainer
        trainer = Trainer(config, count, data_loader, test_loader)

        trainer.train()
        result = trainer.test()

        scores.append(result)
        elapsed = time.time() - start
        times.append(elapsed)

    scores = np.array(scores)
    times = np.array(times)
    print('aver time', times.mean())
    # print('avg\tacc\tf1\tprec\trec\tauc')
    print('acc:',
          scores.mean(axis=0)[0], '\nf1',
          scores.mean(axis=0)[1], '\nprec',
          scores.mean(axis=0)[2], '\nrec',
          scores.mean(axis=0)[3])
예제 #24
0
def train(dataset_path, dim_seed, batch_size, lr_g, lr_d, alpha, beta, seed):
    """
    Train GAN for generating real user features
    """
    dim_user = 70
    dim_seed = dim_seed
    expert_user_features = load_dataset(dataset_path)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    model = GanModel(dim_user,
                     dim_seed,
                     lr_g,
                     lr_d,
                     expert_user_features,
                     batch_size=batch_size,
                     alpha=alpha,
                     beta=beta).to(device)
    WL = model.train()
    model.save_model()

    state = {"WL": WL}
    torch.save(state, "./hyper_batch_256_G1r_0.0005_Dlr_0.0001.pth")
# parser.add_argument("--batch_size", type=int, default=64, help="Batch Size")
# parser.add_argument("--num_epochs", type=int, default=50, help="Number of training epochs")
# parser.add_argument("--evaluate_every", type=int, default=2, help="Evaluate model on dev set after this many steps (default: 50)")
# parser.add_argument("--moving_average", type=bool, default=True, help="Enable usage of Exponential Moving Average")

# args = parser.parse_args()
# # try:
# #     args = parser.parse_args()
# #     print(args)
# # except:
# #     return 1

# In[4]:

#Loading Data
train_data, train_label, test_data, test_label, bgr_mean = data_loader.load_dataset(
    face_dataset, 1)
bgr_mean = [round(x, 4) for x in bgr_mean]

# In[5]:

acc_list = [0]
loss_train_list = [0]
loss_test_list = [0]

sess = tf.Session()

# In[6]:

bgr_mean = [93.5940, 104.7624, 129.1863]
cnn = VGGFace(bgr_mean, vgg_weights, num_classes=8, weight_decay=5e-4)
vgg_known_acc_max = [0.65, 0.51, 0.59, 0.49, 0.59]
예제 #26
0
from data_loader import load_dataset
from attacks import create_attack
from configs import path_fig, epsilon, eps_step, path
from utils import get_acc_preds, plot_attacks_acc, plot_compare_acc, __resize_array_images
from models.models import medical_vgg_model
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import accuracy_score, roc_auc_score

# query user for dataset
dataset = input(
    'Enter dataset to be used (brain_mri, mnist, cifar, ddsm, lidc)\n')

# load in dataset
# lidc, ddsm augment
if dataset == 'ddsm' or dataset == 'lidc':
    x_train, y_train, x_test, y_test = load_dataset(dataset, path, aug=True)
else:
    x_train, y_train, x_test, y_test = load_dataset(dataset, path, aug=False)
# query user for adversarial attack to use for generating adversarial test set
attack_type = input('Enter attack to be used (fgsm, pgd, bim)\n')
if (attack_type != 'fgsm') & (attack_type != 'pgd') & (
        attack_type != 'bim') & (attack_type != 'jsma'):
    print('attack type not supported\n')
    exit(0)

# verify that x_train, y_train, x_test, y_test have the correct dimensions
print('x_train shape: ', x_train.shape)
print('y_train.shape', y_train.shape)
print('x_test.shape', x_test.shape)
print('y_test.shape', y_test.shape)
def train():
    print("Loading training data...")
    # Get sentences to tensors
    input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(
        PATH_TO_FILE, num_examples=None)

    # NNConfig
    vocab_inp_size = len(inp_lang.word_index) + 1
    vocab_targ_size = len(targ_lang.word_index) + 1
    config = NNConfig(vocab_inp_size, vocab_targ_size)

    # Save i2w file for test and translate
    save_index2word(inp_lang, "./dataset/input_dict.txt")
    save_index2word(targ_lang, "./dataset/target_dict.txt")
    save_max_length(input_tensor, target_tensor, vocab_inp_size,
                    vocab_targ_size, "./dataset/max_len.txt")

    # Setup the trainning data batch
    BUFFER_SIZE = len(input_tensor)
    steps_per_epoch = len(input_tensor) // config.BATCH_SIZE

    dataset = tf.data.Dataset.from_tensor_slices(
        (input_tensor, target_tensor)).shuffle(BUFFER_SIZE)
    dataset = dataset.batch(config.BATCH_SIZE, drop_remainder=True)

    print("Setting Seq2Seq model...")
    # Setup the NN Structure
    encoder = Encoder(config.VOCAB_INP_SIZE, config.EMBEDDING_DIM,
                      config.UNITS, config.BATCH_SIZE)
    decoder = Decoder(config.VOCAB_TARG_SIZE, config.EMBEDDING_DIM,
                      config.UNITS, config.BATCH_SIZE)
    # Setup optimizer
    optimizer = tf.keras.optimizers.Adam()
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')
    # Setup Checkpoint
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     encoder=encoder,
                                     decoder=decoder)

    def loss_function(real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, 0))
        loss_ = loss_object(real, pred)
        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask
        return tf.reduce_mean(loss_)

    @tf.function
    def train_step(inp, targ, enc_hidden, optimizer):
        loss = 0
        with tf.GradientTape() as tape:
            enc_output, enc_hidden = encoder(inp, enc_hidden)
            dec_hidden = enc_hidden
            dec_input = tf.expand_dims([targ_lang.word_index['<start>']] *
                                       config.BATCH_SIZE, 1)

            # Teacher forcing - feeding the target as the next input
            for t in range(1, targ.shape[1]):
                # passing enc_output to the decoder
                predictions, dec_hidden, _ = decoder(dec_input, dec_hidden,
                                                     enc_output)
                loss += loss_function(targ[:, t], predictions)
                # using teacher forcing
                dec_input = tf.expand_dims(targ[:, t], 1)

            batch_loss = (loss / int(targ.shape[1]))
            variables = encoder.trainable_variables + decoder.trainable_variables
            gradients = tape.gradient(loss, variables)
            optimizer.apply_gradients(zip(gradients, variables))
        return batch_loss

    print("Start training ...")
    for epoch in range(config.EPOCHS):

        start = time.time()
        enc_hidden = encoder.initialize_hidden_state()

        total_loss = 0

        for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):

            batch_loss = train_step(inp, targ, enc_hidden, optimizer)
            total_loss += batch_loss

            if batch % 100 == 0:
                print('Epoch {} Batch {} Loss {:.4f}'.format(
                    epoch + 1, batch, batch_loss.numpy()))

        # saving (checkpoint) the model every 2 epochs
        if (epoch + 1) % 2 == 0:
            checkpoint.save(file_prefix=SAVE_PATH)

        print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                            total_loss / steps_per_epoch))
        print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
예제 #28
0
def compute_predictions():
    RMSE = np.zeros((len(datasets), len(training_schemes), num_trials))
    NLL = np.zeros((len(datasets), len(training_schemes), num_trials))
    df_pred_uci = pd.DataFrame(
        columns=["Dataset", "Method", "Target", "Mu", "Sigma"])
    for di, dataset in enumerate(datasets):
        for ti, trainer_obj in enumerate(training_schemes):
            for n in range(num_trials):
                (x_train,
                 y_train), (x_test,
                            y_test), y_scale = data_loader.load_dataset(
                                dataset, return_as_tensor=False)
                batch_size = h_params[dataset]["batch_size"]
                num_iterations = num_epochs * x_train.shape[0] // batch_size
                print("Num of iterations :", num_iterations)
                done = False
                while not done:
                    with tf.device(dev):
                        model_generator = models.get_correct_model(
                            dataset="toy", trainer=trainer_obj)
                        model, opts = model_generator.create(
                            input_shape=x_train.shape[1:])
                        if method_names[
                                ti] == "Laplace":  #training scheme is likelihood; as its 2nd in list
                            print("Trainienr lalpace likelihood")
                            trainer = trainer_obj(
                                model,
                                opts,
                                "laplace",
                                dataset,
                                learning_rate=h_params[dataset]
                                ["learning_rate"])
                        elif method_names[ti] == "Gaussian":
                            print("Trainienr Gaussian likelihood")
                            trainer = trainer_obj(
                                model,
                                opts,
                                "gaussian",
                                dataset,
                                learning_rate=h_params[dataset]
                                ["learning_rate"])
                        else:
                            trainer = trainer_obj(
                                model,
                                opts,
                                dataset,
                                learning_rate=h_params[dataset]
                                ["learning_rate"])
                        model, rmse, nll = trainer.train(x_train,
                                                         y_train,
                                                         x_test,
                                                         y_test,
                                                         y_scale,
                                                         iters=num_iterations,
                                                         batch_size=batch_size,
                                                         verbose=True)

                        #Compute on validation data and save predictions
                        summary_to_add = get_prediction_summary(
                            dataset, method_names[ti], model, x_test, y_test)
                        df_pred_uci = df_pred_uci.append(summary_to_add,
                                                         ignore_index=True)

                        del model
                        tf.keras.backend.clear_session()
                        done = False if np.isinf(nll) or np.isnan(
                            nll) else True
                print("saving {} {}".format(rmse, nll))
                RMSE[di, ti, n] = rmse
                NLL[di, ti, n] = nll

    RESULTS = np.hstack((RMSE, NLL))
    mu = RESULTS.mean(axis=-1)
    error = np.std(RESULTS, axis=-1)

    print("==========================")
    print("[{}]: {} pm {}".format(dataset, mu, error))
    print("==========================")

    print("TRAINERS: {}\nDATASETS: {}".format(
        [trainer.__name__ for trainer in training_schemes], datasets))
    print("MEAN: \n{}".format(mu))
    print("ERROR: \n{}".format(error))

    return df_pred_uci
예제 #29
0
파일: main.py 프로젝트: wandercap/lstm
import os
import time
import data_loader
import itertools
from torchtext import data
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
from lstm import LSTMClassifier

TEXT, LABEL, label_size, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = data_loader.load_dataset(
)


def clip_gradient(model, clip_value):
    params = list(filter(lambda p: p.grad is not None, model.parameters()))
    for p in params:
        p.grad.data.clamp_(-clip_value, clip_value)


def plot_confusion_matrix(cm,
                          classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
예제 #30
0
if __name__ == '__main__':
    import numpy as np
    import pandas as pd
    import time
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    from sklearn.tree import DecisionTreeClassifier

    from private_tree.tree import DP_Random_Forest
    from private_tree.dp_datagen import (generate_data, generate_partition)

    from data_loader import (load_dataset, allocate_data_to_agents)
    from evaluations.eval_utils import (plot_results, save_data, eval,
                                        as_dataframe)

    data, x, y, p, c = load_dataset('census', 'education-num')
    data = data[0:10]
    ## todo: just avoid counts < 5 or so (prune branch)
    print(data)

    mdl = DP_Random_Forest(train=data.values,
                           categs=[],
                           num_trees=1,
                           max_tree_depth=10,
                           seed=1)
    mdl.fit(train=data.values, eps=1.0)
    mdl.predict(test=data.values)
    # Agent Generate private (unlabeled) data
    partition = generate_partition(mdl._trees[0], data, x)
    for p in partition:
        print(p)