def _build_data_loaders(self):
        transforms = self._get_transforms()

        # DEFINE: DATASETS
        train_dataset = Dataset(
            data_dir=self.data_dir,
            transforms=transforms,
            load_files=self.data_config["load_files"],
        )

        test_dataset = Dataset(
            data_dir=self.test_data_dir,
            transforms=transforms,
            load_files=self.data_config["load_files"],
        )

        # DEFINE: DATA LOADER
        self.train_data_loader = DataLoader(
            dataset=train_dataset,
            batch_size=self.model_config["batch_size"],
            shuffle=True,
            pin_memory=True,
            num_workers=self.model_config["batch_size"] // 2,
        )

        self.test_data_loader = DataLoader(
            dataset=test_dataset,
            batch_size=self.model_config["batch_size"],
            shuffle=True,
            pin_memory=True,
            num_workers=self.model_config["batch_size"] // 2,
        )

        self.test_data_loader_ = iter(self.test_data_loader)
Exemple #2
0
def test(**kwargs):
    opt.parse(kwargs)

    if opt.device is not None:
        opt.device = torch.device(opt.device)
    elif opt.gpus:
        opt.device = torch.device(0)
    else:
        opt.device = torch.device('cpu')

    pretrain_model = load_pretrain_model(opt.pretrain_model_path)

    generator = GEN(opt.dropout,
                    opt.image_dim,
                    opt.text_dim,
                    opt.hidden_dim,
                    opt.bit,
                    pretrain_model=pretrain_model).to(opt.device)

    path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit)
    load_model(generator, path)

    generator.eval()

    images, tags, labels = load_data(opt.data_path, opt.dataset)

    i_query_data = Dataset(opt, images, tags, labels, test='image.query')
    i_db_data = Dataset(opt, images, tags, labels, test='image.db')
    t_query_data = Dataset(opt, images, tags, labels, test='text.query')
    t_db_data = Dataset(opt, images, tags, labels, test='text.db')

    i_query_dataloader = DataLoader(i_query_data,
                                    opt.batch_size,
                                    shuffle=False)
    i_db_dataloader = DataLoader(i_db_data, opt.batch_size, shuffle=False)
    t_query_dataloader = DataLoader(t_query_data,
                                    opt.batch_size,
                                    shuffle=False)
    t_db_dataloader = DataLoader(t_db_data, opt.batch_size, shuffle=False)

    qBX = generate_img_code(generator, i_query_dataloader, opt.query_size)
    qBY = generate_txt_code(generator, t_query_dataloader, opt.query_size)
    rBX = generate_img_code(generator, i_db_dataloader, opt.db_size)
    rBY = generate_txt_code(generator, t_db_dataloader, opt.db_size)

    query_labels, db_labels = i_query_data.get_labels()
    query_labels = query_labels.to(opt.device)
    db_labels = db_labels.to(opt.device)

    mapi2t = calc_map_k(qBX, rBY, query_labels, db_labels)
    mapt2i = calc_map_k(qBY, rBX, query_labels, db_labels)
    print('...test MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i))
Exemple #3
0
def train(args):
    train_transforms = transforms.Compose([
        transforms.Resize(args.image_shape),
        transforms.RandomHorizontalFlip(),
        transforms.Normalize()
    ])

    eval_transforms = transforms.Compose(
        [transforms.Resize(args.image_shape),
         transforms.Normalize()])

    train_dataset = Dataset(
        data_dir=args.data_dir,
        file_list=args.train_list,
        transforms=train_transforms,
        num_workers='auto',
        buffer_size=100,
        parallel_method='thread',
        shuffle=True)

    eval_dataset = None
    if args.val_list is not None:
        eval_dataset = Dataset(
            data_dir=args.data_dir,
            file_list=args.val_list,
            transforms=eval_transforms,
            num_workers='auto',
            buffer_size=100,
            parallel_method='thread',
            shuffle=False)

    if args.model_type == 'HumanSegMobile':
        model = HumanSegMobile(num_classes=2)
    elif args.model_type == 'HumanSegLite':
        model = HumanSegLite(num_classes=2)
    elif args.model_type == 'HumanSegServer':
        model = HumanSegServer(num_classes=2)
    else:
        raise ValueError(
            "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', "
            "'HumanSegLite', 'HumanSegServer')".format(args.model_type))
    model.train(
        num_epochs=args.num_epochs,
        train_dataset=train_dataset,
        train_batch_size=args.batch_size,
        eval_dataset=eval_dataset,
        save_interval_epochs=args.save_interval_epochs,
        save_dir=args.save_dir,
        pretrained_weights=args.pretrained_weights,
        resume_weights=args.resume_weights,
        learning_rate=args.learning_rate,
        use_vdl=args.use_vdl)
    def _build_data_loaders(self):
        transforms = self._get_transforms()
        extraimg_transform = torchvision.transforms.Compose(
            transforms["frame"].transforms[1:])

        # DEFINE: DATASETS
        train_dataset = Dataset(
            data_dir=self.data_dir,
            transforms=transforms,
            load_files=self.data_config["load_files"],
        )

        test_dataset = Dataset(
            data_dir=self.test_data_dir,
            transforms=transforms,
            load_files=self.data_config["load_files"],
        )

        train_extraimg_dataset = IMGDataset(
            data_dir=self.extraimg_data_dir,
            data_type=self.extraimg_type,
            transform=extraimg_transform,
        )

        # DEFINE: DATA LOADER
        self.train_data_loader = DataLoader(
            dataset=train_dataset,
            batch_size=self.model_config["batch_size"],
            shuffle=True,
            pin_memory=True,
            num_workers=self.model_config["batch_size"] // 2,
        )

        self.test_data_loader = DataLoader(
            dataset=test_dataset,
            batch_size=self.model_config["batch_size"],
            shuffle=True,
            pin_memory=True,
            num_workers=self.model_config["batch_size"] // 2,
        )

        self.train_extraimg_data_loader = DataLoader(
            dataset=train_extraimg_dataset,
            batch_size=self.model_config["batch_size"],
            shuffle=True,
            pin_memory=True,
            num_workers=self.model_config["batch_size"] // 2,
        )

        self.test_data_loader_ = iter(self.test_data_loader)
Exemple #5
0
def gen_s_curve(rng, emissions):
    """Generate synthetic data from datasets generating process.
    """
    N = 500
    J = 100
    D = 2

    # Generate latent manifold.
    # -------------------------
    X, t = make_s_curve(N, random_state=rng)
    X = np.delete(X, obj=1, axis=1)
    X = X / np.std(X, axis=0)
    inds = t.argsort()
    X = X[inds]
    t = t[inds]

    # Generate kernel `K` and latent GP-distributed maps `F`.
    # -------------------------------------------------------
    K = kern.RBF(input_dim=D, lengthscale=1).K(X)
    F = rng.multivariate_normal(np.zeros(N), K, size=J).T

    # Generate emissions using `F` and/or `K`.
    # ----------------------------------------
    if emissions == 'bernoulli':
        P = logistic(F)
        Y = rng.binomial(1, P).astype(np.double)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    if emissions == 'gaussian':
        Y = F + np.random.normal(0, scale=0.5, size=F.shape)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'multinomial':
        C = 100
        pi = np.exp(F - logsumexp(F, axis=1)[:, None])
        Y = np.zeros(pi.shape)
        for n in range(N):
            Y[n] = rng.multinomial(C, pi[n])
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'negbinom':
        P = logistic(F)
        R = np.arange(1, J + 1, dtype=float)
        Y = rng.negative_binomial(R, 1 - P)
        return Dataset('s-curve', False, Y, X, F, K, R, t)
    else:
        assert (emissions == 'poisson')
        theta = np.exp(F)
        Y = rng.poisson(theta)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
 def __init__(self):
     dataset = Dataset('config_compas.json')
     x, y = dataset.get_data(readable=True)
     # r = "Af_vs_all"
     r = "Af_vs_Caucasian"
     # r = "all"
     x, y = get_dataframe(x, y, requested=r)
     self.finder = RelationshipsFinder(pd.concat([x, y], axis=1))
Exemple #7
0
def load_bridges():
    """Load NYC bridges dataset:

    https://data.cityofnewyork.us/Transportation/
      Bicycle-Counts-for-East-River-Bridges/gua4-p9wg
    """
    data = np.load(f'datasets/bridges.npy', allow_pickle=True)
    data = data[()]
    Y = data['Y']
    labels = data['labels']
    return Dataset('bridges', True, Y, labels=labels)
Exemple #8
0
def main():
    config = Config()

    create_dirs([config.summary_dir, config.checkpoint_dir])

    sess = tf.Session()

    train_data = Dataset(config.root,
                         config.train_image_file,
                         config.type,
                         transform=Augmentaton(size=config.resize,
                                               mean=config.means[config.type],
                                               std=config.stds[config.type]),
                         max_samples=None)
    valid_data = Dataset(config.root,
                         config.valid_image_file,
                         config.type,
                         transform=Augmentaton(size=config.resize,
                                               mean=config.means[config.type],
                                               std=config.stds[config.type]),
                         max_samples=None)
    train_data_loader = DataLoader(train_data)
    valid_data_loader = DataLoader(valid_data)

    model = DenseNet(config)

    logger = Logger(sess, config)

    trainer = DenseNetTrainer(sess, model, train_data_loader,
                              valid_data_loader, config, logger)

    model.load(sess)

    if config.phase == "train":
        trainer.train()

    elif config.phase == "test":
        trainer.test("prediction.csv")
def main(config_file):
    """
    :param config_file:
    :return:
    """
    tf.reset_default_graph()

    with open(config_file) as config_file:
        config = json.load(config_file)

    dset = Dataset(config['dset_name'], config['dset_config'])

    model_file = get_model_file(config)

    with tf.device(config['device']):
        model = construct_model(config['dset_name'])
        attack = construct_attack(model, config, dset)

    saver = tf.train.Saver()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        # Restore the checkpoint
        saver.restore(sess, model_file)

        # Iterate over the samples batch-by-batch
        num_eval_examples = config['num_eval_examples']
        eval_batch_size = config['eval_batch_size']
        num_batches = int(math.ceil(num_eval_examples / eval_batch_size))

        x_adv = []  # adv accumulator

        print('Iterating over {} batches'.format(num_batches))

        for ibatch in range(num_batches):
            bstart = ibatch * eval_batch_size
            bend = min(bstart + eval_batch_size, num_eval_examples)
            print('batch size: {}'.format(bend - bstart))

            x_batch, y_batch = dset.get_eval_data(bstart, bend)

            x_batch_adv = attack.perturb(x_batch, y_batch, sess)

            x_adv.append(x_batch_adv)

        print('Storing examples')
        path = data_path_join(config['store_adv_path'])
        x_adv = np.concatenate(x_adv, axis=0)
        np.save(path, x_adv)
        print('Examples stored in {}'.format(path))
Exemple #10
0
def evaluate(args):
    eval_transforms = transforms.Compose(
        [transforms.Resize(args.image_shape),
         transforms.Normalize()])

    eval_dataset = Dataset(data_dir=args.data_dir,
                           file_list=args.val_list,
                           transforms=eval_transforms,
                           num_workers='auto',
                           buffer_size=100,
                           parallel_method='thread',
                           shuffle=False)

    model = models.load_model(args.model_dir)
    model.evaluate(eval_dataset, args.batch_size)
Exemple #11
0
def load_congress():
    """Congress 109 data:

    https://github.com/jgscott/STA380/blob/master/data/congress109.csv
    https://github.com/jgscott/STA380/blob/master/data/congress109members.csv
    """
    df1 = pd.read_csv(f'datasets/congress109.csv')
    df2 = pd.read_csv(f'datasets/congress109members.csv')
    assert (len(df1) == len(df2))

    # Ensure same ordering.
    df1 = df1.sort_values(by='name')
    df2 = df2.sort_values(by='name')

    Y = df1.values[:, 1:].astype(int)
    labels = np.array([0 if x == 'R' else 1 for x in df2.party.values])
    return Dataset('congress109', True, Y, labels=labels)
def instantiate_net(args, Train=True):
    module = importlib.import_module('Models.Class' + args['3_model_class'])
    class_ = getattr(module, args['3_model_class'])
    my_net = class_()

    with_GT = True
    if args['3_dimension'] == '':
        rescale_factor = 'Default'
    else:
        rescale_factor = args['3_dimension']
    my_dataset = Dataset()
    if Train:
        my_dataset.load_train(args['3_ds'], rescale_factor)
    my_dataset.load_test(args['3_ds'], rescale_factor)
    my_net.create_model(args, my_dataset)

    return my_dataset, my_net
def load_test_dataset(data_dir, syntax, max_example_actions_num):
    # all with unary closures
    terminal_vocab_file = os.path.join(data_dir, 'terminal_vocab.txt')
    grammar_file = os.path.join(data_dir, 'grammar.txt.uc.bin')

    grammar = deserialize_from_file(grammar_file)
    terminal_vocab = Vocab(
        terminal_vocab_file,
        data=[Constants.UNK_WORD, Constants.EOS_WORD, Constants.PAD_WORD])
    vocab = Vocab(
        os.path.join(data_dir, 'vocab.txt'),
        data=[Constants.UNK_WORD, Constants.EOS_WORD, Constants.PAD_WORD])

    prefix = 'uc_' + syntax + '_'
    test_dir = os.path.join(data_dir, 'test')
    test = Dataset(test_dir, 'test', grammar, vocab, terminal_vocab, syntax,
                   max_example_actions_num, True)
    torch.save(test, test_file)
Exemple #14
0
def evaluate(args):
    eval_transforms = transforms.Compose(
        [transforms.Resize((192, 192)),
         transforms.Normalize()])

    eval_dataset = Dataset(data_dir=args.data_dir,
                           file_list=args.quant_list,
                           transforms=eval_transforms,
                           num_workers='auto',
                           buffer_size=100,
                           parallel_method='thread',
                           shuffle=False)

    model = models.load_model(args.model_dir)
    model.export_quant_model(dataset=eval_dataset,
                             save_dir=args.save_dir,
                             batch_size=args.batch_size,
                             batch_nums=args.batch_nums)
    def train(self, config, word2vec, tokenizer):
        from datasets.dataset import Dataset, DatasetParam
        dataset_args = DatasetParam()
        dataset_args.output_dir = config['data_params']['output_dir']
        dataset_args.embed_dim = config['data_params']['embed_dim']
        dataset_args.max_sentence_len = config['data_params'][
            'max_sentence_len']
        dataset_args.min_word_freq = config['data_params']['min_word_freq']
        dataset_args.max_vocab_size = config['data_params']['max_vocab_size']
        dataset_args.test_rate = config['data_params']['test_rate']
        dataset_args.tokenizer = tokenizer
        dataset_args.data_dir = config['data_params']['data_dir']
        dataset_args.cate_list = config['model_params']['cate_list']
        dataset_args.word2vec_iterator = word2vec
        dataset_args.data_vocab_dir = config['data_params']['data_vocab_dir']
        dataset_args.data_vocab_tag = str(
            config['data_params']['data_vocab_tag'])
        dataset_args.data_file = config['data_params']['data_file']
        dataset = Dataset(dataset_args)
        train_set, test_set = dataset.buildWithAllData(False)
        x_train, y_train = zip(*train_set)
        x_train = np.array(x_train)
        y_train = np.array(y_train)
        x_test, y_test = zip(*test_set)
        x_test = np.array(x_test)
        y_test = np.array(y_test)

        # 加载贝叶斯模型
        from sklearn.naive_bayes import BernoulliNB
        from sklearn.externals import joblib
        classifier = BernoulliNB()

        # 训练模型并保存
        classifier.fit(x_train, y_train)
        joblib.dump(classifier,
                    os.path.join(dataset_args.output_dir, 'bayes_model.m'))

        # 验证并计算acc
        y_ = classifier.predict(x_test)
        acc = np.mean(
            [1 if y_[i] == y_test[i] else 0 for i in range(y_test.shape[0])],
            axis=0)
        print("eval acc: %f" % acc)
Exemple #16
0
def get_dummy_data(domain, data_size, query_manager=None):
    dis = {}
    for attr, n in zip(domain.attrs, domain.shape):
        random_dist = np.random.exponential(10, n)
        random_dist = random_dist / np.sum(random_dist)
        dis[attr] = random_dist
    arr = [
        np.random.choice(n, data_size, p=dis[attr])
        for attr, n in zip(domain.attrs, domain.shape)
    ]
    values = np.array(arr).T
    df = pd.DataFrame(values, columns=domain.attrs)
    data = Dataset(df, domain)
    if query_manager is not None:
        ans = query_manager.get_answer(data)
        print("max answer: ", np.max(ans))
        plt.hist(ans)
        plt.show()

    return data
    def _get_transforms(self):
        # for normalizer of mel
        mel_data_loader = None
        if not os.path.isfile(self.data_config["mel_normalizer_savefile"]):
            mel_dataset = Dataset(
                data_dir=self.data_dir,
                transforms={},
                load_files=["log_mel_spec", "mel_if"],
            )
            mel_data_loader = DataLoader(
                dataset=mel_dataset,
                batch_size=self.model_config["batch_size"],
                shuffle=False,
                pin_memory=True,
                num_workers=self.model_config["batch_size"] // 2,
            )

        # Data definitions
        frame_transforms = [
            torchvision.transforms.ToPILImage(),
            torchvision.transforms.Resize((256, 256)),
            torchvision.transforms.RandomHorizontalFlip(p=0.5),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]
        if self.model_config["flip"] is not True:
            flip_transform = frame_transforms.pop(1)
            assert isinstance(flip_transform,
                              torchvision.transforms.RandomHorizontalFlip)

        transforms = {
            "frame":
            torchvision.transforms.Compose(frame_transforms),
            "mel":
            MelNormalizer(
                dataloader=mel_data_loader,
                savefile_path=self.data_config["mel_normalizer_savefile"],
            ),
        }

        return transforms
Exemple #18
0
def main(config_file):
    """
    :param config_file:
    :return:
    """
    # deallocate memory if any
    tf.reset_default_graph()
    #free_gpus()

    # load configs.
    with open(config_file) as config_file:
        config = json.load(config_file)

    # load dataset
    dset = Dataset(config['dset_name'], config['dset_config'])

    with tf.device(config['device']):
        model = construct_model(config['dset_name'])

    x_adv = np.load(data_path_join(config['store_adv_path']))

    model_file = get_model_file(config)

    num_eval_examples = config['num_eval_examples']
    eval_batch_size = config['eval_batch_size']
    target_shape = (num_eval_examples, ) + get_dataset_shape(
        config['dset_name'])

    check_values(x_adv, dset.min_value, dset.max_value)
    check_shape(x_adv, target_shape)

    res = get_res(model_file,
                  x_adv,
                  config['attack_config']['epsilon'],
                  model,
                  dset,
                  num_eval_examples=num_eval_examples,
                  eval_batch_size=eval_batch_size)

    return res
Exemple #19
0
def _build_data_loader(data_dir, batch_size=256, sr=16000):
    transforms = {}
    if sr != 32000:
        transforms["audio"] = lambda audio: librosa.resample(audio, sr, 32000)
        print(f"[!] sr: {sr} -> 32000")

    # DEFINE: DATASETS
    train_dataset = Dataset(
        data_dir=data_dir,
        transforms=transforms,
        load_files=["audio"],
    )

    # DEFINE: DATA LOADER
    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=True,
        num_workers=batch_size // 2,
    )
    return train_data_loader
def get_loader(csv_dir, split, resample, slices_per_example, batch_size,
               num_workers, toy, input_scan, output_scan):
    """Initialize the data loader"""
    csv_dir = Path(csv_dir)

    # Default csv path is csv_dir / train.csv or whatever split is
    csv_path = str(csv_dir / f'{split}.csv')

    dataset = Dataset(csv_path=csv_path,
                      split=split,
                      toy=toy,
                      input_scan=input_scan,
                      output_scan=output_scan,
                      resample=resample,
                      num_slices=slices_per_example)
    loader = data.DataLoader(dataset,
                             batch_size=batch_size,
                             drop_last=False,
                             pin_memory=True,
                             num_workers=num_workers)

    return loader
Exemple #21
0
def evaluate():
    config = Config()
    valid_data = Dataset(config.root,
                         valid_image_paths,
                         config.type,
                         transform=Augmentaton(size=config.resize,
                                               mean=config.means[config.type],
                                               std=config.stds[config.type]),
                         max_samples=10)
    valid_data_loader = DataLoader(valid_data)

    sess = tf.Session()
    model = DenseNet(config)
    logger = Logger(sess, config)
    trainer = DenseNetTrainer(sess, model, valid_data_loader,
                              valid_data_loader, config, logger)

    model.load(sess)

    if config.phase == "train":
        trainer.train()

    elif config.phase == "test":
        trainer.test(output_prediction_path)
Exemple #22
0
def get_dummy_data2(domain, data_size, query_manager, display=False):
    num_attr = len(domain.attrs)

    bag = {}
    for i in range(len(query_manager.workloads)):
        if len(bag) >= num_attr // 2: break
        for attr in query_manager.workloads[i]:
            id = query_manager.att_id[attr]
            if id not in bag:
                attr_size = domain.shape[id]
                bag[id] = np.random.randint(0, attr_size)

    arr = []
    for _ in range(data_size):
        arr.append(get_dummy_row(domain, bag))
    values = np.array(arr)
    df = pd.DataFrame(values, columns=domain.attrs)
    data = Dataset(df, domain)
    if display:
        ans = query_manager.get_answer(data)
        print("max answer: ", np.max(ans))
        plot_bins(ans, title='Dummy')

    return data
Exemple #23
0
def main(config_file):
    np.random.seed(1)
    tf.reset_default_graph()
    config = load_config(config_file)

    # dataset
    dset_name = config['dset_name']
    dset = Dataset(dset_name, config['dset_config'])
    dset_shape = get_dataset_shape(config['dset_name'])
    dim = np.prod(dset_shape)

    # model and computational graph
    model_file = get_model_file(config)
    with tf.device(config['device']):
            model = construct_model(dset_name)
            grad = tf.gradients(model.xent, model.x_input)[0]
            flat_grad = tf.reshape(grad, [NUM_SAMPLES, -1])
            flat_sgn = tf_nsign(flat_grad)
            norm_flat_grad = tf.div(flat_grad, tf.norm(flat_grad, axis=1, keepdims=True))

            sim_mat = tf.matmul(norm_flat_grad, norm_flat_grad, transpose_b=True)
            sims = tf.gather_nd(sim_mat, list(zip(*np.triu_indices(NUM_SAMPLES, k=1))))

            dist_mat = (dim - tf.matmul(flat_sgn, flat_sgn, transpose_b=True)) / 2.0
            dists = tf.gather_nd(dist_mat, list(zip(*np.triu_indices(NUM_SAMPLES, k=1))))

    saver = tf.train.Saver()
    writer = tf.summary.FileWriter(
        data_path_join("hamming_dist_exp")
    )

    epsilon = config['attack_config']['epsilon']
    num_batches = int(math.ceil(NUM_EVAL_EXAMPLES / EVAL_BATCH_SIZE))

    for _epsilon in np.linspace(epsilon/10, epsilon, 3):
        # histogram recorder
        tf.summary.histogram(
            "{}_hamming_dist_xr_sgn_grad_eps_{}_{}_samples_{}_pts".format(dset_name, _epsilon, NUM_SAMPLES, NUM_EVAL_EXAMPLES),
            dists
        )

        tf.summary.histogram(
            "{}_cosine_sim_xr_grad_eps_{}_{}_samples_{}_pts".format(dset_name, _epsilon, NUM_SAMPLES, NUM_EVAL_EXAMPLES),
            sims
        )

        summs = tf.summary.merge_all()

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            # Restore the checkpoint
            saver.restore(sess, model_file)
            # Iterate over the data points one-by-one

            print('Iterating over {} batches'.format(num_batches))

            for ibatch in range(num_batches):
                bstart = ibatch * EVAL_BATCH_SIZE
                bend = min(bstart + EVAL_BATCH_SIZE, NUM_EVAL_EXAMPLES)
                print('batch size: {}'.format(bend - bstart))

                x_batch, y_batch = dset.get_eval_data(bstart, bend)

                xr_batch = np.clip(
                    x_batch + np.random.uniform(-_epsilon, _epsilon, [NUM_SAMPLES, *x_batch.shape[1:]]),
                    dset.min_value,
                    dset.max_value
                )
                yr_batch = y_batch.repeat(NUM_SAMPLES)

                summ_val = sess.run(summs, feed_dict={
                    model.x_input: xr_batch,
                    model.y_input: yr_batch
                })

                writer.add_summary(summ_val, global_step=ibatch)
Exemple #24
0
        state = {
            'agent': agent_state_dict,
            'epoch': epoch,
            'reward': reward,
            'dice': dice
        }
        torch.save(
            state, args.cv_dir + '/ckpt_E_%d_D_%.3f_R_%.2E_S_%.2f_#_%d.t7' %
            (epoch, dice, reward, sparsity, len(policy_set)))
        torch.save(state, args.cv_dir + '/best.t7')


best_dice = 0.0
if __name__ == '__main__':
    # define dateset
    train_ds = Dataset(os.path.join(args.data_dir, 'train', 'ct'),
                       os.path.join(args.data_dir, 'train', 'seg'))
    test_ds = Dataset(os.path.join(args.data_dir, 'test', 'ct'),
                      os.path.join(args.data_dir, 'test', 'seg'),
                      test=True)

    # define data loader
    trainloader = DataLoader(train_ds,
                             args.batch_size,
                             shuffle=True,
                             num_workers=0,
                             pin_memory=True)
    testloader = DataLoader(test_ds,
                            args.batch_size,
                            shuffle=False,
                            num_workers=0,
                            pin_memory=True)
Exemple #25
0
def train(**kwargs):
    opt.parse(kwargs)

    if opt.vis_env:
        vis = Visualizer(opt.vis_env, port=opt.vis_port)

    if opt.device is None or opt.device is 'cpu':
        opt.device = torch.device('cpu')
    else:
        opt.device = torch.device(opt.device)

    images, tags, labels = load_data(opt.data_path, type=opt.dataset)
    train_data = Dataset(opt, images, tags, labels)
    train_dataloader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
    L = train_data.get_labels()
    L = L.to(opt.device)
    # test
    i_query_data = Dataset(opt, images, tags, labels, test='image.query')
    i_db_data = Dataset(opt, images, tags, labels, test='image.db')
    t_query_data = Dataset(opt, images, tags, labels, test='text.query')
    t_db_data = Dataset(opt, images, tags, labels, test='text.db')

    i_query_dataloader = DataLoader(i_query_data, opt.batch_size, shuffle=False)
    i_db_dataloader = DataLoader(i_db_data, opt.batch_size, shuffle=False)
    t_query_dataloader = DataLoader(t_query_data, opt.batch_size, shuffle=False)
    t_db_dataloader = DataLoader(t_db_data, opt.batch_size, shuffle=False)

    query_labels, db_labels = i_query_data.get_labels()
    query_labels = query_labels.to(opt.device)
    db_labels = db_labels.to(opt.device)

    pretrain_model = load_pretrain_model(opt.pretrain_model_path)

    generator = GEN(opt.dropout, opt.image_dim, opt.text_dim, opt.hidden_dim, opt.bit, opt.num_label, pretrain_model=pretrain_model).to(opt.device)

    discriminator = DIS(opt.hidden_dim//4, opt.hidden_dim//8, opt.bit).to(opt.device)

    optimizer = Adam([
        # {'params': generator.cnn_f.parameters()},     ## froze parameters of cnn_f
        {'params': generator.image_module.parameters()},
        {'params': generator.text_module.parameters()},
        {'params': generator.hash_module.parameters()}
    ], lr=opt.lr, weight_decay=0.0005)

    optimizer_dis = {
        'feature': Adam(discriminator.feature_dis.parameters(), lr=opt.lr, betas=(0.5, 0.9), weight_decay=0.0001),
        'hash': Adam(discriminator.hash_dis.parameters(), lr=opt.lr, betas=(0.5, 0.9), weight_decay=0.0001)
    }

    tri_loss = TripletLoss(opt, reduction='sum')

    loss = []

    max_mapi2t = 0.
    max_mapt2i = 0.
    max_average = 0.

    mapt2i_list = []
    mapi2t_list = []
    train_times = []

    B_i = torch.randn(opt.training_size, opt.bit).sign().to(opt.device)
    B_t = B_i
    H_i = torch.zeros(opt.training_size, opt.bit).to(opt.device)
    H_t = torch.zeros(opt.training_size, opt.bit).to(opt.device)

    for epoch in range(opt.max_epoch):
        t1 = time.time()
        e_loss = 0
        for i, (ind, img, txt, label) in tqdm(enumerate(train_dataloader)):
            imgs = img.to(opt.device)
            txt = txt.to(opt.device)
            labels = label.to(opt.device)

            batch_size = len(ind)

            h_i, h_t, f_i, f_t = generator(imgs, txt)
            H_i[ind, :] = h_i.data
            H_t[ind, :] = h_t.data
            h_t_detach = generator.generate_txt_code(txt)

            #####
            # train feature discriminator
            #####
            D_real_feature = discriminator.dis_feature(f_i.detach())
            D_real_feature = -opt.gamma * torch.log(torch.sigmoid(D_real_feature)).mean()
            # D_real_feature = -D_real_feature.mean()
            optimizer_dis['feature'].zero_grad()
            D_real_feature.backward()

            # train with fake
            D_fake_feature = discriminator.dis_feature(f_t.detach())
            D_fake_feature = -opt.gamma * torch.log(torch.ones(batch_size).to(opt.device) - torch.sigmoid(D_fake_feature)).mean()
            # D_fake_feature = D_fake_feature.mean()
            D_fake_feature.backward()

            # train with gradient penalty
            alpha = torch.rand(batch_size, opt.hidden_dim//4).to(opt.device)
            interpolates = alpha * f_i.detach() + (1 - alpha) * f_t.detach()
            interpolates.requires_grad_()
            disc_interpolates = discriminator.dis_feature(interpolates)
            gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                                      grad_outputs=torch.ones(disc_interpolates.size()).to(opt.device),
                                      create_graph=True, retain_graph=True, only_inputs=True)[0]
            gradients = gradients.view(gradients.size(0), -1)
            # 10 is gradient penalty hyperparameter
            feature_gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * 10
            feature_gradient_penalty.backward()

            optimizer_dis['feature'].step()

            #####
            # train hash discriminator
            #####
            D_real_hash = discriminator.dis_hash(h_i.detach())
            D_real_hash = -opt.gamma * torch.log(torch.sigmoid(D_real_hash)).mean()
            optimizer_dis['hash'].zero_grad()
            D_real_hash.backward()

            # train with fake
            D_fake_hash = discriminator.dis_hash(h_t.detach())
            D_fake_hash = -opt.gamma * torch.log(torch.ones(batch_size).to(opt.device) - torch.sigmoid(D_fake_hash)).mean()
            D_fake_hash.backward()

            # train with gradient penalty
            alpha = torch.rand(batch_size, opt.bit).to(opt.device)
            interpolates = alpha * h_i.detach() + (1 - alpha) * h_t.detach()
            interpolates.requires_grad_()
            disc_interpolates = discriminator.dis_hash(interpolates)
            gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                                      grad_outputs=torch.ones(disc_interpolates.size()).to(opt.device),
                                      create_graph=True, retain_graph=True, only_inputs=True)[0]
            gradients = gradients.view(gradients.size(0), -1)

            hash_gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * 10
            hash_gradient_penalty.backward()

            optimizer_dis['hash'].step()

            loss_G_txt_feature = -torch.log(torch.sigmoid(discriminator.dis_feature(f_t))).mean()
            loss_adver_feature = loss_G_txt_feature

            loss_G_txt_hash = -torch.log(torch.sigmoid(discriminator.dis_hash(h_t_detach))).mean()
            loss_adver_hash = loss_G_txt_hash

            tri_i2t = tri_loss(h_i, labels, target=h_t, margin=opt.margin)
            tri_t2i = tri_loss(h_t, labels, target=h_i, margin=opt.margin)
            weighted_cos_tri = tri_i2t + tri_t2i

            i_ql = torch.sum(torch.pow(B_i[ind, :] - h_i, 2))
            t_ql = torch.sum(torch.pow(B_t[ind, :] - h_t, 2))
            loss_quant = i_ql + t_ql
            err = opt.alpha * weighted_cos_tri + \
                  opt.beta * loss_quant + opt.gamma * (loss_adver_feature + loss_adver_hash)

            optimizer.zero_grad()
            err.backward()
            optimizer.step()

            e_loss = err + e_loss

        P_i = torch.inverse(
                L.t() @ L + opt.lamb * torch.eye(opt.num_label, device=opt.device)) @ L.t() @ B_i
        P_t = torch.inverse(
                L.t() @ L + opt.lamb * torch.eye(opt.num_label, device=opt.device)) @ L.t() @ B_t

        B_i = (L @ P_i + opt.mu * H_i).sign()
        B_t = (L @ P_t + opt.mu * H_t).sign()
        loss.append(e_loss.item())
        print('...epoch: %3d, loss: %3.3f' % (epoch + 1, loss[-1]))
        delta_t = time.time() - t1

        if opt.vis_env:
            vis.plot('loss', loss[-1])

        # validate
        if opt.valid and (epoch + 1) % opt.valid_freq == 0:
            mapi2t, mapt2i = valid(generator, i_query_dataloader, i_db_dataloader, t_query_dataloader, t_db_dataloader,
                                   query_labels, db_labels)
            print('...epoch: %3d, valid MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (epoch + 1, mapi2t, mapt2i))

            mapi2t_list.append(mapi2t)
            mapt2i_list.append(mapt2i)
            train_times.append(delta_t)

            if 0.5 * (mapi2t + mapt2i) > max_average:
                max_mapi2t = mapi2t
                max_mapt2i = mapt2i
                max_average = 0.5 * (mapi2t + mapt2i)
                save_model(generator)

            if opt.vis_env:
                vis.plot('mapi2t', mapi2t)
                vis.plot('mapt2i', mapt2i)

        if epoch % 100 == 0:
            for params in optimizer.param_groups:
                params['lr'] = max(params['lr'] * 0.8, 1e-6)

    if not opt.valid:
        save_model(generator)

    print('...training procedure finish')
    if opt.valid:
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (max_mapi2t, max_mapt2i))
    else:
        mapi2t, mapt2i = valid(generator, i_query_dataloader, i_db_dataloader, t_query_dataloader, t_db_dataloader,
                               query_labels, db_labels)
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i))

    path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit)
    with open(os.path.join(path, 'result.pkl'), 'wb') as f:
        pickle.dump([train_times, mapi2t_list, mapt2i_list], f)
def main(config_file):
    np.random.seed(1)
    tf.reset_default_graph()

    config = load_config(config_file)

    dset_name = config['dset_name']
    dset = Dataset(dset_name, config['dset_config'])
    model_file = get_model_file(config)
    epsilon = config['attack_config']['epsilon']

    with tf.device(config['device']):
        model = construct_model(dset_name)
        abs_grad = tf.abs(tf.gradients(model.xent, model.x_input)[0])

    # histogram recorder
    # place holder for dx at x0 and x_rand
    dxo = tf.placeholder(tf.float32, shape=get_dataset_shape(dset_name))
    tf.summary.histogram("{}_part_deriv_mag_xo".format(dset_name), dxo)

    dxr = tf.placeholder(tf.float32, shape=get_dataset_shape(dset_name))
    tf.summary.histogram("{}_part_deriv_mag_xr".format(dset_name), dxr)

    writer = tf.summary.FileWriter(
        data_path_join("partial_derivative_exp")
    )
    summaries = tf.summary.merge_all()
    saver = tf.train.Saver()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        # Restore the checkpoint
        saver.restore(sess, model_file)
        # Iterate over the samples batch-by-batch
        eval_batch_size = config['eval_batch_size']
        num_batches = int(math.ceil(NUM_EVAL_EXAMPLES / eval_batch_size))

        #dxs = None  # grads accumulator

        print('Iterating over {} batches'.format(num_batches))

        for ibatch in range(num_batches):
            bstart = ibatch * eval_batch_size
            bend = min(bstart + eval_batch_size, NUM_EVAL_EXAMPLES)
            print('batch size: {}'.format(bend - bstart))

            x_batch, y_batch = dset.get_eval_data(bstart, bend)
            xr_batch = np.clip(x_batch + np.random.uniform(-epsilon, epsilon, x_batch.shape),
                               dset.min_value,
                               dset.max_value)
            #print(y_batch)
            dxo_batch = sess.run(abs_grad, feed_dict={
                model.x_input: x_batch,
                model.y_input: y_batch
            })

            dxr_batch = sess.run(abs_grad, feed_dict={
                model.x_input: xr_batch,
                model.y_input: y_batch
            })

            for i, step in enumerate(range(bstart, bend)):
                summ = sess.run(summaries, feed_dict={dxo: dxo_batch[i],
                                                      dxr: dxr_batch[i]})
                writer.add_summary(summ, global_step=step)
Exemple #27
0
def generate(real_answers: np.array,
             N: int,
             domain: Domain,
             query_manager: QueryManager,
             epsilon: float,
             delta: float,
             epsilon_split: float,
             noise_multiple: float,
             samples: int,
             alpha=0,
             show_prgress=True):
    assert epsilon_split > 0
    assert noise_multiple > 0
    neg_real_answers = 1 - real_answers
    D = np.sum(domain.shape)
    Q_size = query_manager.num_queries

    prev_queries = []
    neg_queries = []

    final_oh_fake_data = []  # stores the final data
    '''
    Calculate the total number of rounds using advance composition
    '''
    T, epsilon_0 = get_iters(epsilon, delta, epsilon_split)

    # print(f'epsilon_0 = {epsilon_0}')
    exponential_scale = np.sqrt(T) * noise_multiple
    # print(f'epsilon_0 = {epsilon_0}')
    if show_prgress: progress_bar = tqdm(total=T)
    for t in range(T):
        """
        Sample s times from FTPL
        """
        util2.blockPrint()
        num_processes = 8
        s2 = int(1.0 + samples / num_processes)
        samples_rem = samples
        processes = []
        manager = mp.Manager()
        fake_temp = manager.list()

        query_workload, q_weights = query_manager.get_query_workload_weighted(
            prev_queries)
        neg_query_workload, n_weights = query_manager.get_query_workload_weighted(
            neg_queries)

        for __ in range(num_processes):
            temp_s = samples_rem if samples_rem - s2 < 0 else s2
            samples_rem -= temp_s
            noise = np.random.exponential(exponential_scale, (temp_s, D))
            proc = mp.Process(target=gen_fake_data,
                              args=(fake_temp, query_workload, q_weights,
                                    neg_query_workload, n_weights, noise,
                                    domain, alpha, temp_s))

            proc.start()
            processes.append(proc)

        assert samples_rem == 0, "samples_rem = {}".format(samples_rem)
        for p in processes:
            p.join()

        util2.enablePrint()
        oh_fake_data = []
        assert len(fake_temp) > 0
        for x in fake_temp:
            oh_fake_data.append(x)
            final_oh_fake_data.append(x)

        assert len(oh_fake_data
                   ) == samples, "len(D_hat) = {} len(fake_data_ = {}".format(
                       len(oh_fake_data), len(fake_temp))
        for i in range(samples):
            assert len(oh_fake_data[i]) == D, "D_hat dim = {}".format(
                len(oh_fake_data[0]))
        # assert not final_oh_fake_data or len(final_oh_fake_data[0][1]) == D, "D_hat dim = {}".format(len(oh_fake_data[0]))
        fake_data = Dataset(
            pd.DataFrame(util2.decode_dataset(oh_fake_data, domain),
                         columns=domain.attrs), domain)
        """
        Compute Exponential Mechanism distribution
        """
        fake_answers = query_manager.get_answer(fake_data)
        neg_fake_answers = 1 - fake_answers

        score = np.append(real_answers - fake_answers,
                          neg_real_answers - neg_fake_answers)

        EM_dist_0 = np.exp(epsilon_0 * score * N / 2, dtype=np.float128)
        sum = np.sum(EM_dist_0)
        assert sum > 0
        assert not np.isinf(sum)
        EM_dist = EM_dist_0 / sum
        assert not np.isnan(
            EM_dist).any(), "EM_dist_0 = {} EM_dist = {} sum = {}".format(
                EM_dist_0, EM_dist, sum)
        assert not np.isinf(
            EM_dist).any(), "EM_dist_0 = {} EM_dist = {} sum = {}".format(
                EM_dist_0, EM_dist, sum)
        """
        Sample from EM
        """
        q_t_ind = util2.sample(EM_dist)

        if q_t_ind < Q_size:
            prev_queries.append(q_t_ind)
        else:
            neg_queries.append(q_t_ind - Q_size)

        if show_prgress:
            progress_bar.update()
            progress_bar.set_postfix({
                'max error': f'{np.max(score):.3f}',
                'round error': f'{score[q_t_ind]:.3f}'
            })

    if show_prgress: progress_bar.close()

    final_fem_data = Dataset(
        pd.DataFrame(util2.decode_dataset(final_oh_fake_data, domain),
                     columns=domain.attrs), domain)
    return final_fem_data
def load_dataset(config, force_regenerate=False):
    dj_dir = './preprocessed/django'
    logging.info('=' * 80)
    logging.info('Loading datasets from folder ' + dj_dir)
    logging.info('=' * 80)
    train, test, dev = None, None, None
    prefix = config.syntax + '_'
    if config.unary_closures:
        prefix += 'uc_'

    train_dir = os.path.join(dj_dir, 'train')
    train_file = os.path.join(train_dir, prefix + 'train.pth')
    if not force_regenerate and os.path.isfile(train_file):
        logging.info('Train dataset found, loading...')
        train = torch.load(train_file)
        train.config = config

    test_dir = os.path.join(dj_dir, 'test')
    test_file = os.path.join(test_dir, prefix + 'test.pth')
    if not force_regenerate and os.path.isfile(test_file):
        logging.info('Test dataset found, loading...')
        test = torch.load(test_file)
        test.config = config

    dev_dir = os.path.join(dj_dir, 'dev')
    dev_file = os.path.join(dev_dir, prefix + 'dev.pth')
    if not force_regenerate and os.path.isfile(dev_file):
        logging.info('Dev dataset found, loading...')
        dev = torch.load(dev_file)
        dev.config = config

    if train is None or test is None or dev is None:
        terminal_vocab_file = os.path.join(dj_dir, 'terminal_vocab.txt')
        if config.unary_closures:
            grammar_file = os.path.join(dj_dir, 'grammar.txt.uc.bin')
        else:
            grammar_file = os.path.join(dj_dir, 'grammar.txt.bin')

        grammar = deserialize_from_file(grammar_file)
        terminal_vocab = Vocab(
            terminal_vocab_file,
            data=[Constants.UNK_WORD, Constants.EOS_WORD, Constants.PAD_WORD])
        vocab = Vocab(
            os.path.join(dj_dir, 'vocab.txt'),
            data=[Constants.UNK_WORD, Constants.EOS_WORD, Constants.PAD_WORD])

        if test is None:
            logging.info('Test dataset not found, generating...')
            test = Dataset(test_dir, 'test', grammar, vocab, terminal_vocab,
                           config.syntax, config.max_example_action_num,
                           config.unary_closures)
            torch.save(test, test_file)

        if dev is None:
            logging.info('Dev dataset not found, generating...')
            dev = Dataset(dev_dir, 'dev', grammar, vocab, terminal_vocab,
                          config.syntax, config.max_example_action_num,
                          config.unary_closures)
            torch.save(dev, dev_file)

        if train is None:
            logging.info('Train dataset not found, generating...')
            train = Dataset(train_dir, 'train', grammar, vocab, terminal_vocab,
                            config.syntax, config.max_example_action_num,
                            config.unary_closures)
            torch.save(train, train_file)

    train.prepare_torch(config.cuda)
    dev.prepare_torch(config.cuda)
    test.prepare_torch(config.cuda)
    return train, dev, test
from plotly.subplots import make_subplots

from datasets.dataset import Dataset, DatasetConfig
from graphics.graphs import draw_data_points, draw_loss_function, prepare_frame
from losses.loss_function import Loss
from models import linear

if __name__ == '__main__':
    # Generate the dataset
    dataset = Dataset(conf=DatasetConfig.load('apartment_prices'))

    # Build theoretical loss function
    loss_function = Loss(dataset=dataset, use_intercept=False)

    # Train the model
    a_hist, loss_hist = linear.train(dataset,
                                     epochs=100,
                                     lr=0.0004,
                                     early_stopping_delta=100)

    fig = make_subplots(rows=1, cols=2)
    draw_data_points(dataset=dataset, figure=fig)
    draw_loss_function(loss_function=loss_function, figure=fig)
    fig.update(
        frames=[prepare_frame(a, loss) for a, loss in zip(a_hist, loss_hist)])
    fig.update_layout(updatemenus=[
        dict(type="buttons",
             buttons=[dict(label="Train", method="animate", args=[None])])
    ],
                      showlegend=False)
    fig.show()
Exemple #30
0
    store_name = os.path.join(data_dir, '{}_tbl.h5'.format(exp_id))
    offset = 0
    # rewrite all the results alternatively one could make use of `offset` to append to the h5 file above.
    if os.path.exists(store_name):
        os.remove(store_name)

    for _cf in cfs:
        # for reproducibility
        np.random.seed(1)
        config_file = config_path_join(_cf)
        tf.reset_default_graph()

        with open(config_file) as config_file:
            config = json.load(config_file)

        dset = Dataset(config['dset_name'], config['dset_config'])
        dset_dim = np.prod(get_dataset_shape(config['dset_name']))

        model_file = get_model_file(config)
        with tf.device(config['device']):
            model = construct_model(config['dset_name'])
            flat_est_grad = tf.placeholder(tf.float32, shape=[None, dset_dim])
            flat_grad = tf.reshape(
                tf.gradients(model.xent, model.x_input)[0], [-1, dset_dim])
            norm_flat_grad = tf.maximum(
                tf.norm(flat_grad, axis=1, keepdims=True),
                np.finfo(np.float64).eps)
            norm_flat_est_grad = tf.maximum(
                tf.norm(flat_est_grad, axis=1, keepdims=True),
                np.finfo(np.float64).eps)
            cos_sim = tf.reduce_sum(tf.multiply(