예제 #1
0
def train_composition(dataset, transformation_list):
    """
    Train a model on dataset on which a sequence of transformations applied
    :param dataset: the original dataset
    :param transformation_list: the sequence of transformations
    :return:
    """
    # Apply a sequence of transformations
    (X_train, Y_train), (X_test, Y_test) = load_data(dataset)
    X_train = transform(X_train, transformation_list)

    nb_examples, img_rows, img_cols, nb_channels = X_train.shape
    nb_classes = Y_train.shape[1]
    input_shape = (img_rows, img_cols, nb_channels)

    # Train a model and save
    model_name = 'model-{}-cnn-{}'.format(dataset, 'composition')
    require_preprocess = (dataset == DATA.cifar_10)

    model = models.create_model(dataset, input_shape, nb_classes)
    models.train(model, X_train, Y_train, model_name, require_preprocess)
    # save to disk
    models.save_model(model, model_name)

    # evaluate the new model
    loaded_model = models.load_model(model_name)
    X_test = transform(X_test, transformation_list)

    if require_preprocess:
        X_test = normalize(X_test)

    scores = loaded_model.evaluate(X_test, Y_test, verbose=2)
    print('*** Evaluating the new model: {}'.format(scores))
    del loaded_model
예제 #2
0
def train_model(dataset, transform_type):
    """
    Train specific model on given dataset.
    :param dataset:
    :param transform_type:
    """
    print('Training model ({}) on {}...'.format(transform_type, dataset))
    (X_train, Y_train), (X_test, Y_test) = load_data(dataset)
    nb_examples, img_rows, img_cols, nb_channels = X_train.shape
    nb_classes = Y_train.shape[1]
    input_shape = (img_rows, img_cols, nb_channels)

    X_train = transform(X_train, transform_type)

    model_name = 'model-{}-cnn-{}'.format(dataset, transform_type)
    require_preprocess = False
    if (dataset == DATA.cifar_10):
        require_preprocess = True

    # train
    model = models.create_model(dataset, input_shape, nb_classes)
    models.train(model, X_train, Y_train, model_name, require_preprocess)
    # save to disk
    models.save_model(model, model_name)
    # evaluate the new model
    X_test = transform(X_test, transform_type)
    loaded_model = models.load_model(model_name)
    scores = loaded_model.evaluate(X_test, Y_test, verbose=2)
    print('*** Evaluating the new model: {}'.format(scores))
    del loaded_model
예제 #3
0
def main():
    """
        USER CONTROLS
    """
    args = get_arguments()

    args, config, params, net_params = get_configs(args)

    #   define which pipeline to be used
    if params['swa'] == True:
        from pipeline_swa import train_val_pipeline_classification
    elif params['swag'] == True:
        from pipeline_swag import train_val_pipeline_classification
    elif (params['sgld'] == True) or (params['psgld'] == True):
        from pipeline_sgld import train_val_pipeline_classification
    else:
        from pipeline import train_val_pipeline_classification

    if params['bbp'] == True:
        from nets.molecules_graph_regression.load_bbp_net import gnn_model  # import all GNNS
    else:
        from nets.molecules_graph_regression.load_net import gnn_model  # import all GNNS

    DATASET_NAME = config['dataset']
    MODEL_NAME = config['model']

    # setting seeds
    set_seed(params['seed'])
    print("Seed Number of Models: " + str(params['seed']))
    print("Data Seed Number: " + str(params['data_seed']))

    dataset = load_data(DATASET_NAME, args.num_train, args.num_val,
                        args.num_test, args.data_seed, params)

    # network parameters

    #   add task information for net_params loss
    net_params['task'] = 'classification'
    net_params['num_classes'] = dataset.num_classes
    net_params['num_atom_type'] = dataset.num_atom_type
    net_params['num_bond_type'] = dataset.num_bond_type
    out_dir = config['out_dir']

    root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" +\
        str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" +\
        str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    root_output_dir = out_dir + 'outputs/outputs_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" +\
        str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')

    dirs = root_ckpt_dir, write_file_name, root_output_dir
    dirs = add_dir_name(dirs, MODEL_NAME, config, params, net_params)

    net_params['total_param'] = view_model_param(MODEL_NAME, net_params)
    train_val_pipeline_classification(MODEL_NAME, DATASET_NAME, dataset,
                                      config, params, net_params, dirs)
예제 #4
0
def main(config):

    #Initialize Network
    net = Network(config)

    data = {}

    if config.run_mode == 'train':

        data['train'] = load_data(config, 'train')

        net.train(data)

    if config.run_mode == 'test':

        data['test'] = load_data(config, 'test')

        net.test(data)

    return 0
def main():
    args = get_parameters()

    (x_train,
     y_train), (x_test,
                y_test), categories = load_data(args.data_set, args.train_size)

    x_full = np.concatenate((x_train, x_test))
    y_full = np.concatenate((y_train, y_test))

    if ',' not in args.mi_estimator:
        information_calculator = get_information_calculator(
            x_full, y_full, args.mi_estimator, args.bins)
        processor = InformationProcessorDeltaApprox(information_calculator)
    else:
        mies = args.mi_estimator.split(',')
        calculators = [
            get_information_calculator(x_full, y_full, mie, args.bins)
            for mie in mies
        ]
        ips = [InformationProcessorDeltaApprox(calc) for calc in calculators]
        processor = InformationProcessorUnion(ips)

    model = get_model_categorical(input_shape=x_train[0].shape,
                                  network_shape=args.shape,
                                  categories=categories,
                                  activation=args.activation)

    print("Training and Calculating mutual information")
    batch_size = min(args.batch_size,
                     len(x_train)) if args.batch_size > 0 else len(x_train)
    no_of_batches = math.ceil(len(x_train) / batch_size) * args.epochs
    information_callback = CalculateInformationCallback(
        model, processor, x_full)
    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        callbacks=[information_callback,
                   ProgressBarCallback(no_of_batches)],
        epochs=args.epochs,
        validation_data=(x_test, y_test),
        verbose=0)

    append = ",b-" + str(information_callback.batch)
    print("Saving data to files")
    processor.save(args.dest + "/data/" + filename(args) + append)
    print("Producing image")
    processor.plot(args.dest + "/images/" + filename(args) + append)
    print("Done")
    return
예제 #6
0
def main():
    DATA.set_current_dataset_name(DATA.mnist)

    # trans_types = TRANSFORMATION.supported_types()
    trans_types = [TRANSFORMATION.clean]

    adversary_types = ATTACK.get_AETypes()

    _, (X, Y) = load_data(DATA.CUR_DATASET_NAME)

    for transformation_type in trans_types:
        TRANSFORMATION.set_cur_transformation_type(transformation_type)
        try:
            # step 1. get a model.
            # case 1. train a new model
            model = train_model((X, Y), transformation_type)

            # or case 2. load an existing model
            # model = models.load_model('model-{}-cnn-{}'.format(DATA.CUR_DATASET_NAME,
            #                                                    transformation_type))

            # step 2. estimate the model
            for adversary in adversary_types:
                X_adv_file = 'test_AE-{}-cnn-clean-{}.npy'.format(
                    DATA.CUR_DATASET_NAME, adversary)
                print('Evaluating weak defenses on dataset [{}]'.format(
                    X_adv_file))
                X_adv_file = os.path.join(PATH.ADVERSARIAL_FILE, X_adv_file)
                X_adv = np.load(X_adv_file)

                test_model(model, copy.deepcopy((X_adv, Y)),
                           transformation_type)
                del X_adv, X_adv_file

            print('')
        except (FileNotFoundError, OSError) as e:
            print(e)
            print('')
            continue

        del model
예제 #7
0
from models import APPNP, GAT, GCN, GFNN, MaskedGCN, MixHop, PPNP, SGC
from data.data import load_data
from train import Trainer
from utils import preprocess_features

import random
import numpy as np
import torch

SEED = 18
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

if __name__ == '__main__':
    data = load_data('cora')
    data.features = preprocess_features(data.features)
    model = GCN(data)
    trainer = Trainer(model,
                      data,
                      lr=0.01,
                      weight_decay=5e-4,
                      epochs=200,
                      patience=10,
                      niter=10,
                      verbose=True)
    trainer.run()
예제 #8
0
def Train(cfg: dict) -> None:
    """
    Execute train process with the base configs.
    
    :param cfg: configuration dictionary (Base.yaml)
    """
    # Load train, dev data 
    X_train, y_train, decoder_input_array_train, mel_spectro_data_array_train, max_X, vocab_size_source = load_data(cfg=cfg, mode="train")
    X_dev, y_dev, decoder_input_array_dev, mel_spectro_data_array_dev = load_data(cfg=cfg, mode="dev")
    print("---------------------------------------------------")
    print("Complete: Load train, dev data")
    print("---------------------------------------------------")
    
    # Make result directories
    model_path = cfg["model_path"]
    make_dir(model_path) #"./Models/"
    result_path = cfg["result_path"]
    make_dir(result_path) # "./Models/result/"
    print("---------------------------------------------------")
    print("Complete: Make result directories")
    print("---------------------------------------------------")
     
    # Save real json, img, video before training
    json_path = result_path + "json/"
    make_dir(json_path) # "./Models/result/json/"
    img_path = result_path + "img_video/"
    make_dir(img_path) # "./Models/result/"img_video/"

    data_path = cfg["data_path"] 
    with open(data_path + 'out_files_dev' +'.pickle', 'rb') as f:
        output_file = pickle.load(f)
    with open(data_path + 'out_gloss_dev' +'.pickle', 'rb') as f:
        output_gloss = pickle.load(f)
    with open(data_path + 'out_skels_dev' +'.pickle', 'rb') as f:
        output_skels = pickle.load(f)
    
    real_json_path = json_path + 'real/'
    make_dir(real_json_path)
    real_img_path = img_path + 'real/'
    make_dir(real_img_path)
    
    for i in range(len(X_dev)):
        leng = output_skels[i]
        real = y_dev[i].tolist()[:leng]
        filename = str(output_file[i]) + '_' + str(output_gloss[i]) + '_real' + '.json'
        
        with open(real_json_path + filename, 'w', encoding='utf-8') as make_file: 
            json.dump(real, make_file, indent="\t")
        
        #make img & video
        create_img_video(real_json_path, real_img_path, filename)
    print("---------------------------------------------------")
    print("Complete: Save real json, img and video files")
    print("---------------------------------------------------")
    
    # Build the tacotron model
    model = build_model(cfg=cfg, max_X=max_X, vocab_size_source=vocab_size_source)
    print("---------------------------------------------------")
    print("Complete: Build model")
    print("---------------------------------------------------")
    
    # Set Optimizer(Adam) and Loss(MSE)
    opt = Adam()
    model.compile(optimizer=opt,
                loss=['mean_squared_error', 'mean_squared_error']) # original was 'mean_absolute_error'

    # Set Callback options
    ### callback1: customized callback (save model and make prediction every 1000 epochs)
    first_callback = MyCallback('save_jsonfile', cfg, X_dev, y_dev, decoder_input_array_dev,
                                output_file, output_gloss, output_skels)

    ### callback2: best model save (update best model.h5 every 10 epochs)
    best_path = model_path + "best_model.h5"

    best_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=best_path,
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1,
        period=10)

    ### callback3: learning rate scheduler (reduce LR by 20% when there is no enhancement of val_loss every 100 epochs)
    patience = cfg["training"].get("patience", 10)
    decrease_factor = cfg["training"].get("decrease_factor", 0.2)
    min_LR = cfg["training"].get("min_LR", 0.00001)

    reduceLR = ReduceLROnPlateau(
        monitor='val_loss',
        factor=decrease_factor,
        patience=patience,
        min_lr=min_LR)

    ### (optional callback)
    # 1. early stopping
    #early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience = 20)

    
    print("---------------------------------------------------")
    print("Start training!")
    print("---------------------------------------------------")

    # Fit Model
    batch_size = cfg["training"].get("batch_size", 2)
    epochs = cfg["training"].get("epoch", 100)

    train_history = model.fit([X_train, decoder_input_array_train],
                            mel_spectro_data_array_train,
                            epochs=epochs, batch_size=batch_size, shuffle=False,
                            verbose=1,
                            validation_data=([X_dev, decoder_input_array_dev], mel_spectro_data_array_dev),
                            callbacks = [first_callback, best_callback, reduceLR]) #total 3 callbacks
    
    print("---------------------------------------------------")
    print("Finish Training! Save the last model and prediction.")
    print("---------------------------------------------------")

    # Save the last Model(100 epoch) and prediction
    model.save(model_path + 'model.h5')
    make_predict(cfg, model, X_dev, y_dev, decoder_input_array_dev,
                 output_file, output_gloss, output_skels, result_path, epochs, best=False)
    
    print("---------------------------------------------------")
    print("Congrats! All works well~!")
    print("---------------------------------------------------")
예제 #9
0
def load():
    data.load_data(db)
    return "ok"
예제 #10
0
파일: rf_mnist.py 프로젝트: zobaed11/athena
    print('Reshaping...', data.shape, 'to', reshaped.shape)
    return reshaped

if __name__ == '__main__':

    training_params = {
        'model': 'rf',
        'dataset': DATA.mnist,
        'n_estimators': 100,
        'criterion': 'gini',
    }

    transformations = TRANSFORMATION.supported_types()
    # transformations = [TRANSFORMATION.clean]

    (X_train, Y_train), (X_test, Y_test) = load_data(DATA.mnist)
    print(X_train.shape, Y_train.shape)
    print(X_test.shape, Y_test.shape)

    MODEL_DIR = os.path.join(PATH.MODEL, 'rf_mnist')
    save_path = 'mnist-rf-'

    if not os.path.exists(MODEL_DIR):
        import pathlib
        print(MODEL_DIR, 'does not exist. Create it.')
        pathlib.Path(MODEL_DIR).mkdir(parents=True, exist_ok=True)

    for trans in transformations:
        save_path = 'mnist-rf-' + trans + '.rf'
        save_path = os.path.join(MODEL_DIR, save_path)
def craft(dataset,
          gen_test=True,
          method=ATTACK.FGSM,
          trans_type=TRANSFORMATION.clean):
    print('loading original images...')

    if gen_test:
        # generate for test set
        _, (X, Y) = load_data(dataset)
        prefix = 'test'
    else:
        # generate for train set (the last 20% of the original train set)
        (X, Y), _ = load_data(dataset)
        nb_trainings = int(X.shape[0] * 0.8)
        X = X[nb_trainings:]
        Y = Y[nb_trainings:]
        prefix = 'val'
    """
    In debugging mode, crafting for 50 samples.
    """
    if MODE.DEBUG:
        X = X[:30]
        Y = Y[:30]

    X = transform(X, trans_type)
    model_name = 'model-{}-cnn-{}'.format(dataset, trans_type)

    if method == ATTACK.FGSM:
        for eps in ATTACK.get_fgsm_eps():
            print('{}: (eps={})'.format(method.upper(), eps))
            X_adv, _ = get_adversarial_examples(model_name,
                                                method,
                                                X,
                                                Y,
                                                eps=eps)

            attack_params = 'eps{}'.format(int(1000 * eps))

            reset(X, trans_type)
            reset(X_adv, trans_type)
            save_adv_examples(X_adv,
                              prefix=prefix,
                              dataset=dataset,
                              transformation=trans_type,
                              attack_method=method,
                              attack_params=attack_params)
    elif method == ATTACK.BIM:
        for ord in ATTACK.get_bim_norm():
            for nb_iter in ATTACK.get_bim_nbIter():
                for eps in ATTACK.get_bim_eps(ord):
                    print('{}: (ord={}, nb_iter={}, eps={})'.format(
                        method.upper(), ord, nb_iter, eps))
                    X_adv, _ = get_adversarial_examples(model_name,
                                                        method,
                                                        X,
                                                        Y,
                                                        ord=ord,
                                                        nb_iter=nb_iter,
                                                        eps=eps)

                    if ord == np.inf:
                        norm = 'inf'
                    else:
                        norm = ord
                    attack_params = 'ord{}_nbIter{}_eps{}'.format(
                        norm, nb_iter, int(1000 * eps))
                    reset(X, trans_type)
                    reset(X_adv, trans_type)
                    save_adv_examples(X_adv,
                                      prefix=prefix,
                                      dataset=dataset,
                                      transformation=trans_type,
                                      attack_method=method,
                                      attack_params=attack_params)
    elif method == ATTACK.DEEPFOOL:
        for order in [2]:
            for overshoot in ATTACK.get_df_overshoots(order):
                print('attack {} -- order: {}; overshoot: {}'.format(
                    method.upper(), order, overshoot))
                X_adv, _ = get_adversarial_examples(model_name,
                                                    method,
                                                    X,
                                                    Y,
                                                    ord=order,
                                                    overshoot=overshoot)

                attack_params = 'l{}_overshoot{}'.format(order, int(overshoot))
                reset(X, trans_type)
                reset(X_adv, trans_type)
                save_adv_examples(X_adv,
                                  prefix=prefix,
                                  bs_samples=X,
                                  dataset=dataset,
                                  transformation=trans_type,
                                  attack_method=method,
                                  attack_params=attack_params)

    elif method == ATTACK.CW_L2:
        binary_search_steps = 16  #9
        cw_batch_size = 2  #1
        initial_const = 1  #10

        for learning_rate in ATTACK.get_cwl2_lr():
            for max_iter in ATTACK.get_cwl2_maxIter():
                print('{}: (ord={}, max_iterations={})'.format(
                    method.upper(), 2, max_iter))
                X_adv, _ = get_adversarial_examples(
                    model_name,
                    method,
                    X,
                    Y,
                    ord=2,
                    max_iterations=max_iter,
                    binary_search_steps=binary_search_steps,
                    cw_batch_size=cw_batch_size,
                    initial_const=initial_const,
                    learning_rate=learning_rate)

                attack_params = 'lr{}_maxIter{}'.format(
                    int(learning_rate * 1000), max_iter)
                reset(X, trans_type)
                reset(X_adv, trans_type)
                save_adv_examples(X_adv,
                                  prefix=prefix,
                                  bs_samples=X,
                                  dataset=dataset,
                                  transformation=trans_type,
                                  attack_method=method,
                                  attack_params=attack_params)

    elif method == ATTACK.CW_Linf:
        initial_const = 1e-5
        # X *= 255.

        for learning_rate in ATTACK.get_cwl2_lr():
            for max_iter in ATTACK.get_cwl2_maxIter():
                print('{}: (ord={}, max_iterations={})'.format(
                    method.upper(), np.inf, max_iter))
                X_adv, _ = get_adversarial_examples(
                    model_name,
                    method,
                    X,
                    Y,
                    max_iterations=max_iter,
                    initial_const=initial_const,
                    learning_rate=learning_rate)

                attack_params = 'lr{}_maxIter{}'.format(
                    int(learning_rate * 10), max_iter)
                reset(X, trans_type)
                reset(X_adv, trans_type)
                save_adv_examples(X_adv,
                                  prefix=prefix,
                                  bs_samples=X,
                                  dataset=dataset,
                                  transformation=trans_type,
                                  attack_method=method,
                                  attack_params=attack_params)

    elif method == ATTACK.CW_L0:
        initial_const = 1e-5

        for learning_rate in ATTACK.get_cwl2_lr():
            for max_iter in ATTACK.get_cwl2_maxIter():
                print('{}: (ord={}, max_iterations={})'.format(
                    method.upper(), np.inf, max_iter))
                X_adv, _ = get_adversarial_examples(
                    model_name,
                    method,
                    X,
                    Y,
                    max_iterations=max_iter,
                    initial_const=initial_const,
                    learning_rate=learning_rate)

                attack_params = 'lr{}_maxIter{}'.format(
                    int(learning_rate * 10), max_iter)
                reset(X, trans_type)
                reset(X_adv, trans_type)
                save_adv_examples(X_adv,
                                  prefix=prefix,
                                  bs_samples=X,
                                  dataset=dataset,
                                  transformation=trans_type,
                                  attack_method=method,
                                  attack_params=attack_params)

    elif method == ATTACK.JSMA:
        for theta in ATTACK.get_jsma_theta():
            for gamma in ATTACK.get_jsma_gamma():
                print('{}: (theta={}, gamma={})'.format(
                    method.upper(), theta, gamma))
                X_adv, _ = get_adversarial_examples(model_name,
                                                    method,
                                                    X,
                                                    Y,
                                                    theta=theta,
                                                    gamma=gamma)

                attack_params = 'theta{}_gamma{}'.format(
                    int(100 * theta), int(100 * gamma))
                reset(X, trans_type)
                reset(X_adv, trans_type)
                save_adv_examples(X_adv,
                                  prefix=prefix,
                                  bs_samples=X,
                                  dataset=dataset,
                                  transformation=trans_type,
                                  attack_method=method,
                                  attack_params=attack_params)

    elif method == ATTACK.PGD:
        nb_iter = 1000
        eps_iter = 0.05  #0.01

        for eps in ATTACK.get_pgd_eps():
            if eps < 0.05:
                eps_iter = 0.01
            elif eps <= 0.01:
                eps_iter = 0.005
            X_adv, _ = get_adversarial_examples(model_name,
                                                method,
                                                X,
                                                Y,
                                                eps=eps,
                                                nb_iter=nb_iter,
                                                eps_iter=eps_iter)
            attack_params = 'eps{}_nbIter{}_epsIter{}'.format(
                int(1000 * eps), nb_iter, int(1000 * eps_iter))
            reset(X, trans_type)
            reset(X_adv, trans_type)
            save_adv_examples(X_adv,
                              prefix=prefix,
                              bs_samples=X,
                              dataset=dataset,
                              transformation=trans_type,
                              attack_method=method,
                              attack_params=attack_params)

    elif method == ATTACK.ONE_PIXEL:
        for pixel_counts in ATTACK.get_op_pxCnt():
            for max_iter in ATTACK.get_op_maxIter():
                for pop_size in ATTACK.get_op_popsize():
                    attack_params = {
                        'pixel_counts': pixel_counts,
                        'max_iter': max_iter,
                        'pop_size': pop_size
                    }
                    X_adv, _ = get_adversarial_examples(
                        model_name, method, X, Y, **attack_params)
                    X_adv = np.asarray(X_adv)
                    attack_params = 'pxCount{}_maxIter{}_popsize{}'.format(
                        pixel_counts, max_iter, pop_size)
                    reset(X, trans_type)
                    reset(X_adv, trans_type)
                    save_adv_examples(X_adv,
                                      prefix=prefix,
                                      bs_samples=X,
                                      dataset=dataset,
                                      transformation=trans_type,
                                      attack_method=method,
                                      attack_params=attack_params)
    elif method == ATTACK.MIM:
        for eps in ATTACK.get_mim_eps():
            for nb_iter in ATTACK.get_mim_nbIter():
                attack_params = {'eps': eps, 'nb_iter': nb_iter}

                X_adv, _ = get_adversarial_examples(model_name, method, X, Y,
                                                    **attack_params)
                attack_params = 'eps{}_nbIter{}'.format(
                    int(eps * 100), nb_iter)
                reset(X, trans_type)
                reset(X_adv, trans_type)
                save_adv_examples(X_adv,
                                  prefix=prefix,
                                  bs_samples=X,
                                  dataset=dataset,
                                  transformation=trans_type,
                                  attack_method=method,
                                  attack_params=attack_params)

    del X
    del Y
예제 #12
0
파일: toksents.py 프로젝트: ppyht2/modeling
#!/usr/bin/env python

import sys
import os
from data import data
import marshal

sent_file = sys.argv[1]
d = data.load_data(sent_file)
token_seq = data.tokenize(d)
marshal_file = os.path.splitext(sent_file)[0] + '.marshal'
marshal.dump(token_seq, open(marshal_file, 'w'))
print('DONE ' + sent_file)
예제 #13
0
from keras.callbacks import ModelCheckpoint
from keras.layers import Convolution2D, Activation, Flatten, Dense, MaxPooling2D, Dropout
from keras.models import Sequential

from data.data import load_data

if __name__ == '__main__':

    train_data, test_data, train_labels, test_labels = load_data(
        './data/fer2013.csv')

    model = Sequential()

    model.add(
        Convolution2D(64, 3, 3, border_mode='valid', input_shape=(1, 48, 48)))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(128, 5, 5))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(512, 3, 3))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
예제 #14
0
def main():
    from data.data import load_data

    model = models.load_model('data/models/model-mnist-cnn-clean.h5')
    _, (X, Y) = load_data(DATA.mnist)
    print(model.evaluate(X, Y, verbose=1))
예제 #15
0
def run(stem_fn,
        block_fn,
        classifier_fn,
        voting_strategy_fn,
        boosting_strategy_fn,
        training_style,
        epochs,
        batch_size,
        block_num,
        dataset_name,
        classes,
        metrics_options,
        log_dir,
        load_stem=None,
        patience=12,
        progressive_training_epochs=5):
    """
    Args:
        load_stem (str): path to the weights file for the stem
    """
    metrics.setup_log_files(log_dir, block_num, metrics_options)

    # load data
    train_gen, validate_gen, train_data_shape, validate_data_shape, label_shape, class_num = data.load_data(
        dataset_name, batch_size, classes)

    data_ph, label_ph, _, weak_logits, classifier, classification_metrics = boosted_classifier.build_model(
        stem_fn,
        block_fn,
        classifier_fn,
        block_num,
        voting_strategy_fn,
        batch_size,
        class_num,
        train_data_shape,
        label_shape,
        load_stem=load_stem)


    stem_saver = tf.train.Saver(
        tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='stem'))
    weighted_losses = boosting_strategy.calculate_boosted_losses(
        boosting_strategy_fn, weak_logits, label_ph, batch_size, class_num)

    weights_scale_ph = tf.placeholder_with_default(
        tf.ones([block_num]), [block_num])

    def feed_dict_fn(epoch):
        data, labels = next(train_gen)
        feed_dict = {data_ph: data, label_ph: labels}
        if training_style == 'progressive':
            val = np.zeros([block_num], dtype=np.float32)
            val[epoch // 2] = 1.
            val[(epoch // 2) - 1] = 0.
            feed_dict[weights_scale_ph] = val
        return feed_dict

    def validate_feed_dict_fn():
        data, labels = next(validate_gen)
        feed_dict = {data_ph: data, label_ph: labels}
        return feed_dict

    # calculate gradients
    optimizer = tf.train.AdamOptimizer()
    final_grads_and_vars, grad_metrics = boosting_strategy.calculate_boosted_gradients(
        optimizer, weighted_losses, weights_scale_ph)
    train_op = optimizer.apply_gradients(final_grads_and_vars)

    # if the voting strategy has an update fn, use it
    # I, for one, welcome our new duck typing overlords
    if hasattr(classifier.voting_strategy, 'update'):
        voting_strategy_update_op = classifier.voting_strategy.update(
            weak_logits, label_ph)
        train_op = tf.group(train_op, voting_strategy_update_op)

    print("Trainable Parameters: {}".format(
        np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
        ])))

    verbose_ops_dict = classification_metrics
    if 'gradient_norms' in metrics_options:
        verbose_ops_dict.update(grad_metrics)

    # initialize session and train
    process_metrics_fn = functools.partial(
        metrics.process_metrics, log_dir=log_dir, options=metrics_options)
    early_stopping_fn = util.build_early_stopping_fn(patience=patience)
    full_metrics = util.train(
        train_op,
        epochs,
        train_steps_per_epoch=train_data_shape[0] // batch_size,
        validate_steps_per_epoch=validate_data_shape[0] // batch_size,
        verbose_ops_dict=verbose_ops_dict,
        train_feed_dict_fn=feed_dict_fn,
        validate_feed_dict_fn=validate_feed_dict_fn,
        process_metrics_fn=process_metrics_fn,
        early_stopping_fn=early_stopping_fn,
        stem_saver=stem_saver,
        stem=load_stem)

    return full_metrics
        summary_path = "{}_{}_summary.csv".format(name, folders[-1])

    else:
        summary_path = name + "_summary.csv"

    if params.extend:
        try:
            summary_path = path.abspath(summary_path)
            df = pd.read_csv(summary_path)
        except:
            print("Could not load CSV from {}".format(summary_path))
            sys.exit(1)
        df = clean_data(df)
    else:
        data_path = path.abspath(params.data_path)
        df = load_data(data_path)
        df["summary_cluster"] = [""] * len(df)
        df["summary_textrank"] = [""] * len(df)

    print("Starting Summarization of Articles")
    print("Summary File Location: {}".format(summary_path))
    for index, row in tqdm(df.iterrows(), total=len(df)):
        if row.summary_cluster == "" or row.summary_textrank == "":
            article = row.text

            if len(transformer.tokenizer(article)["input_ids"]) <= 510:
                summary_cluster = article
                summary_textrank = article
            else:
                summary_cluster = summarize(article=article,
                                            cluster_alg="hdbscan",
예제 #17
0
def train_model(model, dataset, model_name, need_augment=False, is_BB=False, **kwargs):
    (X_train, Y_train), _ = data.load_data(dataset)
    return train(model, X_train, Y_train, model_name, need_augment, is_BB=is_BB, **kwargs)
예제 #18
0
파일: eval.py 프로젝트: aids69/boltalko
import tensorflow as tf
import tensorlayer as tl
import numpy as np
import os
from data import data

print("=======TEST.PY IMPORTED WHAT THE F**K=======")

metadata, idx_q, idx_a = data.load_data(PATH='data/')

w2idx = metadata['w2idx']  # dict  word 2 index
idx2w = metadata['idx2w']  # list index 2 word

print("Loading vocab done:", "shapes", idx_q.shape, idx_a.shape)

emb_dim = 512
batch_size = 256
xvocab_size = yvocab_size = len(idx2w)

unk_id = w2idx['unk']  # 1
pad_id = w2idx['_']  # 0

start_id = xvocab_size
end_id = xvocab_size + 1

w2idx['start_id'] = start_id
w2idx['end_id'] = end_id
idx2w = idx2w + ['start_id', 'end_id']

xvocab_size = yvocab_size = xvocab_size + 2
w2idx['end_id']
예제 #19
0

    print("------AUGMENT SUMMARY-------")
    print("EXPERIMENT ROOT:", args.experiment_root)
    print("MODEL CONFIGS:", args.model_configs)
    print("OUTPUT ROOT:", args.output_root)
    print('----------------------------\n')

    # ----------------------------
    # parse configurations (into a dictionary) from json file
    # ----------------------------
    model_configs = load_from_json(args.model_configs)
    model_configs["wresnet"]["dir"] = args.experiment_root + model_configs.get("wresnet").get("dir")

    # ---------------------------
    # load the targeted model
    # ---------------------------
    # In the context of the adversarially trained model,
    # we use the undefended model as adversary's target model.
    savefile = "AdvTrained-cifar100.pth"
    model_file = os.path.join(model_configs.get("wresnet").get('dir'), model_configs.get("wresnet").get("pgd_trained_cifar"))
    model, _, _ = load_model(file=model_file, model_configs=model_configs.get("wresnet"), trans_configs=None)

    (x_train, y_train), _ = load_data('cifar100')

    pgd_adv_train(model=model,
                  data=(x_train, y_train),
                  outpath=args.output_root,
                  model_name=savefile
                  )
예제 #20
0
def main():
    args = get_parameters()

    filename(args)

    (x_train,
     y_train), (x_test,
                y_test), categories = load_data(args.data_set, args.train_size)
    no_of_batches = math.ceil(len(x_train) / args.batch_size) * args.epochs

    epoch_list = args.epoch_list
    if epoch_list[-1][1] > no_of_batches:
        raise ValueError(
            "ranges out of range of training batches, number of batches {}, out of range value {}"
            .format(no_of_batches, epoch_list[-1]))

    model = get_model_categorical(input_shape=x_train[0].shape,
                                  network_shape=args.shape,
                                  categories=categories,
                                  activation=args.activation)
    print("batches {}".format(no_of_batches))
    save_layers_callback = SaveLayers(model, x_test, epoch_list)
    model.fit(x_train,
              y_train,
              batch_size=args.batch_size,
              callbacks=[save_layers_callback],
              epochs=args.epochs,
              validation_data=(x_test, y_test),
              verbose=1)

    def compute_single(saved, dist):
        x_test_hash = hash_data(x_test)
        data_x = x_test_hash
        for _ in range(dist - 1):
            data_x = np.concatenate((data_x, x_test_hash))

        y_test_hash = hash_data(y_test)
        data_y = y_test_hash
        for _ in range(dist - 1):
            data_y = np.concatenate((data_y, y_test_hash))

        # saved data where every number is binned
        saved_bin = [[
            bin_array(layer, bins=args.bins, low=layer.min(), high=layer.max())
            for layer in epoch
        ] for epoch in saved]
        # saved data where every number is hashed
        saved_hash = [[hash_data(layer) for layer in epoch]
                      for epoch in saved_bin]

        data_t = {}
        for t in range(len(saved_hash[0])):
            data_t[t] = np.array([], dtype=np.int64)
        for epoch in range(len(saved_hash)):
            for t in range(len(saved_hash[0])):
                data_t[t] = np.concatenate([data_t[t], saved_hash[epoch][t]])
        data_t = list(data_t.values())

        h_t = np.array([entropy_of_data(t) for t in data_t])
        h_t_x = np.array([__conditional_entropy(t, data_x) for t in data_t])
        h_t_y = np.array([__conditional_entropy(t, data_y) for t in data_t])

        i_x_t = h_t - h_t_x
        i_y_t = h_t - h_t_y

        return i_x_t, i_y_t

    saved = save_layers_callback.saved_layers
    IXT, IYT = [], []
    pickle = {}
    for s, r in zip(saved, epoch_list):
        print("computing information for layers {}".format(r), end="")
        start, end = r
        dist = end - start
        ixt, iyt = compute_single(s, dist)
        print("  {} {}".format(ixt, iyt))
        pickle[start] = (ixt, iyt, [])
        IXT.append(ixt)
        IYT.append(iyt)

    path = args.dest + "/data/as_if_random/" + filename(args)
    _pickle.dump(pickle, open(path, 'wb'))
    path = args.dest + "/images/as_if_random/" + filename(args)
    plot_main(IXT, IYT, filename=path, show=True)

    return
예제 #21
0
파일: main.py 프로젝트: yanne/rideserver
        return project_as_dict(get_project())


class Libraries(Resource):
    def get(self):
        return libraries_as_dict(get_project().libraries.values())


class SearchTests(Resource):
    def get(self, tag):
        suite = get_project().suite
        return tests_as_dict(search_tests_by_tag(suite, tag))


class SearchKeywords(Resource):
    def get(self, pattern):
        return keywords_as_dict(
            search_keywords_by_pattern(get_project().get_all_keywords(), pattern)
        )


api.add_resource(Project, '/project')
api.add_resource(Libraries, '/libraries')
api.add_resource(SearchTests, '/search/tests/<tag>')
api.add_resource(SearchKeywords, '/search/keywords/<pattern>')


if __name__ == "__main__":
    load_data(sys.argv[1])
    app.run(debug=True)
예제 #22
0
def Test(cfg: dict) -> None:
    """
    Execute test process with the base configs.
    
    :param cfg: configuration dictionary (Base.yaml)
    """

    # Load the test data
    X_test, y_test, decoder_input_array_test, mel_spectro_data_array_test = load_data(cfg=cfg, mode="test")
    print("---------------------------------------------------")
    print("Complete: Load test data")
    print("---------------------------------------------------")

    # Load preprocessing data(output_file, output_gloss, output_skels)
    path= cfg["data_path"]
    with open(path + 'out_files_test' +'.pickle', 'rb') as f:
        output_file = pickle.load(f)
    with open(path + 'out_gloss_test' +'.pickle', 'rb') as f:
        output_gloss = pickle.load(f)
    with open(path + 'out_skels_test' +'.pickle', 'rb') as f:
        output_skels = pickle.load(f)
    
    # Make test result directory
    result_path = cfg["test_result_path"]
    make_dir(result_path) # "./test_result/"
        
    save_path = result_path + "json/"
    make_dir(save_path) # "./test_result/json/"
    img_path = result_path + "img_video/"
    make_dir(img_path) # "./test_result/img_video/"
    print("---------------------------------------------------")
    print("Complete: Make test_result directories")
    print("---------------------------------------------------")
        
    # Load Model(best or recent)
    test_mode = cfg["test_mode"]
    
    if test_mode == "best":
        best_model_path = cfg["model_path"] + "best_model.h5"
        model = tf.keras.models.load_model(best_model_path) # best model load
        print("---------------------------------------------------")
        print("Complete: Load best model")
        print("---------------------------------------------------")
        # Make prediction files(json and img, video)
        make_predict(cfg, model, X_test, y_test, decoder_input_array_test,
                     output_file, output_gloss, output_skels,
                     result_path, epoch=None, best=True)
    
    elif test_mode == "recent":
        recent_model_path = cfg["model_path"] + "model.h5"
        model = tf.keras.models.load_model(recent_model_path) # most recent model load
        print("---------------------------------------------------")
        print("Complete: Load recent model")
        print("---------------------------------------------------")
        # Make prediction files(json and img, video)
        make_predict(cfg, model, X_test, y_test, decoder_input_array_test,
                     output_file, output_gloss, output_skels,
                     result_path, epoch=None, best=False)
    
    print("---------------------------------------------------")
    print("Complete: Save prediction json, img and video files")
    print("---------------------------------------------------")
예제 #23
0
def train(dataset,
          model=None,
          trans_type=TRANSFORMATION.clean,
          save_path='cnn_mnist.h5',
          eval=True,
          **kwargs):
    """
    Train a cnn model on MNIST or Fashion-MNIST.
    :param dataset:
    :param model: a model to train.
    :param trans_type: transformation associated to the model.
    :param save_path: file name, including the path, to save the trained model.
    :param kwargs: customized loss function, optimizer, etc. for cleverhans to craft AEs.
    :return: the trained model
    """
    lr = 0.001
    validation_rate = 0.2
    optimizer = kwargs.get('optimizer', keras.optimizers.Adam(lr=lr))
    loss_fn = kwargs.get('loss', keras.losses.categorical_crossentropy)
    metrics = kwargs.get('metrics', 'default')

    logger.info('optimizer: [{}].'.format(optimizer))
    logger.info('loss function: [{}].'.format(loss_fn))
    logger.info('metrics: [{}].'.format(metrics))

    (X_train, Y_train), (X_test, Y_test) = data.load_data(dataset)
    X_train = data_utils.set_channels_last(X_train)
    X_test = data_utils.set_channels_last(X_test)

    # Apply transformation (associated to the weak defending model)
    X_train = data_utils.rescale(transform(X_train, trans_type))
    X_test = data_utils.rescale(transform(X_test, trans_type))

    nb_examples, img_rows, img_cols, nb_channels = X_train.shape
    nb_train_samples = int(nb_examples * (1. - validation_rate))
    train_examples = X_train[:nb_train_samples]
    train_labels = Y_train[:nb_train_samples]
    val_examples = X_train[nb_train_samples:]
    val_labels = Y_train[nb_train_samples:]

    if model is None:
        model = create_model(input_shape=(img_rows, img_cols, nb_channels))

    # Compile model
    if ('default' == metrics):
        model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])
    else:
        model.compile(optimizer=optimizer,
                      loss=loss_fn,
                      metrics=['accuracy', metrics])

    # Train model
    batch_size = kwargs.get('batch_size', 128)
    epochs = kwargs.get('epochs', 20)

    start = time.monotonic()
    history = model.fit(train_examples,
                        train_labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=2,
                        validation_data=(val_examples, val_labels))
    cost = time.monotonic() - start
    logger.info('Done training. It costs {} minutes.'.format(cost / 60.))

    if eval:
        scores_train = model.evaluate(train_examples,
                                      train_labels,
                                      batch_size=128,
                                      verbose=0)
        scores_val = model.evaluate(val_examples,
                                    val_labels,
                                    batch_size=128,
                                    verbose=0)
        scores_test = model.evaluate(X_test, Y_test, batch_size=128, verbose=0)

        logger.info('Evaluation on [{} set]: {}.'.format(
            'training', scores_train))
        logger.info('Evaluation on [{} set]: {}.'.format(
            'validation', scores_val))
        logger.info('Evaluation on [{} set]: {}.'.format(
            'testing', scores_test))

    logger.info('Save the trained model to [{}].'.format(save_path))
    model.save(save_path)

    checkpoints_file = save_path.split('/')[-1].split('.')[0]
    checkpoints_file = 'checkpoints_train_' + checkpoints_file + '.csv'
    checkpoints_file = os.path.join(LOG_DIR, checkpoints_file)

    if not os.path.dirname(LOG_DIR):
        os.mkdir(LOG_DIR)

    logger.info('Training checkpoints have been saved to file [{}].'.format(
        checkpoints_file))
    file.dict2csv(history.history, checkpoints_file)
    save_path = save_path.split('/')[-1].split('.')[0]
    save_path = 'hist_train_' + save_path + '.pdf'
    plot_training_history(history, save_path)

    return model
예제 #24
0
#!/usr/bin/env python

import sys
import os
from data import data
import marshal

sent_file = sys.argv[1]
d = data.load_data(sent_file)
token_seq = data.tokenize(d)
marshal_file = os.path.splitext(sent_file)[0] + ".marshal"
marshal.dump(token_seq, open(marshal_file, "w"))
print("DONE " + sent_file)
예제 #25
0
    def get(self):
        return project_as_dict(get_project())


class Libraries(Resource):
    def get(self):
        return libraries_as_dict(get_project().libraries.values())


class SearchTests(Resource):
    def get(self, tag):
        suite = get_project().suite
        return tests_as_dict(search_tests_by_tag(suite, tag))


class SearchKeywords(Resource):
    def get(self, pattern):
        return keywords_as_dict(
            search_keywords_by_pattern(get_project().get_all_keywords(),
                                       pattern))


api.add_resource(Project, '/project')
api.add_resource(Libraries, '/libraries')
api.add_resource(SearchTests, '/search/tests/<tag>')
api.add_resource(SearchKeywords, '/search/keywords/<pattern>')

if __name__ == "__main__":
    load_data(sys.argv[1])
    app.run(debug=True)
예제 #26
0
                file=model_file,
                model_configs=model_configs.get("wresnet"),
                trans_configs=None)
        else:
            model_file = os.path.join(
                model_configs.get("wresnet").get('dir'),
                model_configs.get("wresnet").get("um_file"))
            model, _, _ = load_model(
                file=model_file,
                model_configs=model_configs.get("wresnet"),
                trans_configs=None)

            # train a model first
            from data.data import load_data
            from adversarial_train import pgd_adv_train
            (x_train, y_train), _ = load_data('cifar100', channel_first=True)
            print('>>> Training the model...')

            target = pgd_adv_train(
                model=model,
                data=(x_train, y_train),
                outpath=model_configs.get("wresnet").get('dir'),
                model_name=model_configs.get("wresnet").get(
                    "pgd_trained_cifar"))

    elif args.targeted_model == 'ensemble':
        # In the context of the white-box threat model,
        # we use the ensemble as adversary's target model.
        # load weak defenses (in this example, load a tiny pool of 3 weak defenses)
        if args.selected_pool is None:
            selected_pool = "demo_pool"
예제 #27
0
def gen_greedy(dataset,
               attacker=ATTACK.FGSM,
               attack_count=None,
               strategy=ATTACK_STRATEGY.RANDOM.value):

    config = tf.ConfigProto(intra_op_parallelism_threads=4,
                            inter_op_parallelism_threads=4)
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    candidates = init_candidate_targets(
        'ensemble/mnist_weak_defenses_fsgm.list')

    print('...In total {} weak defenses.'.format(len(candidates)))

    prefix = 'wb'  # white-box

    if attack_count == None or attack_count <= 0:
        prefix = 'gb'  # gray-box
        attack_count = len(candidates.keys())

    X_adv = []

    _, (X, Y) = load_data(dataset=dataset)

    # generate 500 samples
    batch_size = 100
    nb_samples = Y.shape[0]
    nb_iter = int(nb_samples / batch_size)

    start = time.monotonic()
    for i in range(nb_iter):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, nb_samples)
        print(start_idx, end_idx)
        X_batch = X[start_idx:end_idx]
        Y_batch = Y[start_idx:end_idx]

        print('...In total {} inputs.'.format(Y.shape[0]))
        idx = 0
        for x, y in zip(X_batch, Y_batch):
            print('{}-th input...'.format(idx))

            x = np.expand_dims(x, axis=0)

            strategy = ATTACK_STRATEGY.RANDOM.value
            '''
            generate_single(sess, x, y, attacker=ATTACK.FGSM,
                        candidates=None,
                        attack_count=None,
                        max_perturb=get_perturb_upperbound(),
                        strategy=ATTACK_STRATEGY.RANDOM.value)
            '''
            start_sample = time.monotonic()
            X_adv.append(
                generate_single(sess,
                                x,
                                y,
                                attacker,
                                candidates,
                                attack_count,
                                strategy=strategy))
            end_sample = time.monotonic()
            print('({}, {})-th sample: {}\n\n'.format(
                i, idx, (end_sample - start_sample)))
            idx += 1

        save_adv_examples(np.asarray(X_adv),
                          prefix=prefix,
                          bs_samples=X_batch,
                          dataset=dataset,
                          transformation=strategy,
                          attack_method=attacker,
                          attack_params='eps100_batchsize{}_{}'.format(
                              batch_size, i))

    duration = time.monotonic() - start
    print('----------------------------------')
    print('        Summary')
    print('----------------------------------')
    print('Number of inputs:', Y.shape[0])
    print('Adversary:', attacker)
    print('Strategy:', strategy)
    print('Time cost:', duration)

    sess.close()
예제 #28
0
    with open(filename, 'rb') as file:
        model = pickle.load(file)

    return model
"""

if __name__ == '__main__':
    transformations = TRANSFORMATION.supported_types()

    data = {
        'dataset': DATA.mnist,
        'architecture': 'svm',
    }

    (X_train, Y_train), (X_test, Y_test) = load_data(data['dataset'])

    Y_train = np.argmax(Y_train, axis=1)
    Y_test = np.argmax(Y_test, axis=1)

    for trans in transformations:
        data['trans'] = trans

        data['train'] = (transform(X_train, trans), Y_train)
        data['test'] = (transform(X_test, trans), Y_test)

        model = train(data, training_params=default_train_params)

        filename = 'model-{}-{}-{}.pkl'.format(data['dataset'],
                                               data['architecture'],
                                               data['trans'])
예제 #29
0
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from data.data import load_data

# Parse the Arguments
save_weights = "pretrained/cnn_weights.h5"
model_name = 'pretrained/model.h5'
Saved_Weights_Path = None

if not os.path.exists('pretrained'):
    os.makedirs('pretrained')

# Read/Download MNIST Dataset
print('Loading Dataset...')

X_train, Y_train, X_test, Y_test = load_data()
# Divide data into testing and training sets.
train_img, train_labels, test_img, test_labels = X_train, Y_train, X_test, Y_test

# Now each image rows and columns are of 28x28 matrix type.
img_rows, img_columns = 28, 28

# Transform training and testing data to 10 classes in range [0,classes] ; num. of classes = 0 to 9 = 10 classes

total_classes = 13  # 0 to 9 labels + - *

#each code of matrix have a value like of class like [0,1,.....,+,*,...]

encoder = LabelEncoder()
tra = encoder.fit_transform(Y_train)
train_labels = to_categorical(tra)