def train_composition(dataset, transformation_list): """ Train a model on dataset on which a sequence of transformations applied :param dataset: the original dataset :param transformation_list: the sequence of transformations :return: """ # Apply a sequence of transformations (X_train, Y_train), (X_test, Y_test) = load_data(dataset) X_train = transform(X_train, transformation_list) nb_examples, img_rows, img_cols, nb_channels = X_train.shape nb_classes = Y_train.shape[1] input_shape = (img_rows, img_cols, nb_channels) # Train a model and save model_name = 'model-{}-cnn-{}'.format(dataset, 'composition') require_preprocess = (dataset == DATA.cifar_10) model = models.create_model(dataset, input_shape, nb_classes) models.train(model, X_train, Y_train, model_name, require_preprocess) # save to disk models.save_model(model, model_name) # evaluate the new model loaded_model = models.load_model(model_name) X_test = transform(X_test, transformation_list) if require_preprocess: X_test = normalize(X_test) scores = loaded_model.evaluate(X_test, Y_test, verbose=2) print('*** Evaluating the new model: {}'.format(scores)) del loaded_model
def train_model(dataset, transform_type): """ Train specific model on given dataset. :param dataset: :param transform_type: """ print('Training model ({}) on {}...'.format(transform_type, dataset)) (X_train, Y_train), (X_test, Y_test) = load_data(dataset) nb_examples, img_rows, img_cols, nb_channels = X_train.shape nb_classes = Y_train.shape[1] input_shape = (img_rows, img_cols, nb_channels) X_train = transform(X_train, transform_type) model_name = 'model-{}-cnn-{}'.format(dataset, transform_type) require_preprocess = False if (dataset == DATA.cifar_10): require_preprocess = True # train model = models.create_model(dataset, input_shape, nb_classes) models.train(model, X_train, Y_train, model_name, require_preprocess) # save to disk models.save_model(model, model_name) # evaluate the new model X_test = transform(X_test, transform_type) loaded_model = models.load_model(model_name) scores = loaded_model.evaluate(X_test, Y_test, verbose=2) print('*** Evaluating the new model: {}'.format(scores)) del loaded_model
def main(): """ USER CONTROLS """ args = get_arguments() args, config, params, net_params = get_configs(args) # define which pipeline to be used if params['swa'] == True: from pipeline_swa import train_val_pipeline_classification elif params['swag'] == True: from pipeline_swag import train_val_pipeline_classification elif (params['sgld'] == True) or (params['psgld'] == True): from pipeline_sgld import train_val_pipeline_classification else: from pipeline import train_val_pipeline_classification if params['bbp'] == True: from nets.molecules_graph_regression.load_bbp_net import gnn_model # import all GNNS else: from nets.molecules_graph_regression.load_net import gnn_model # import all GNNS DATASET_NAME = config['dataset'] MODEL_NAME = config['model'] # setting seeds set_seed(params['seed']) print("Seed Number of Models: " + str(params['seed'])) print("Data Seed Number: " + str(params['data_seed'])) dataset = load_data(DATASET_NAME, args.num_train, args.num_val, args.num_test, args.data_seed, params) # network parameters # add task information for net_params loss net_params['task'] = 'classification' net_params['num_classes'] = dataset.num_classes net_params['num_atom_type'] = dataset.num_atom_type net_params['num_bond_type'] = dataset.num_bond_type out_dir = config['out_dir'] root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" +\ str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" +\ str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') root_output_dir = out_dir + 'outputs/outputs_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" +\ str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') dirs = root_ckpt_dir, write_file_name, root_output_dir dirs = add_dir_name(dirs, MODEL_NAME, config, params, net_params) net_params['total_param'] = view_model_param(MODEL_NAME, net_params) train_val_pipeline_classification(MODEL_NAME, DATASET_NAME, dataset, config, params, net_params, dirs)
def main(config): #Initialize Network net = Network(config) data = {} if config.run_mode == 'train': data['train'] = load_data(config, 'train') net.train(data) if config.run_mode == 'test': data['test'] = load_data(config, 'test') net.test(data) return 0
def main(): args = get_parameters() (x_train, y_train), (x_test, y_test), categories = load_data(args.data_set, args.train_size) x_full = np.concatenate((x_train, x_test)) y_full = np.concatenate((y_train, y_test)) if ',' not in args.mi_estimator: information_calculator = get_information_calculator( x_full, y_full, args.mi_estimator, args.bins) processor = InformationProcessorDeltaApprox(information_calculator) else: mies = args.mi_estimator.split(',') calculators = [ get_information_calculator(x_full, y_full, mie, args.bins) for mie in mies ] ips = [InformationProcessorDeltaApprox(calc) for calc in calculators] processor = InformationProcessorUnion(ips) model = get_model_categorical(input_shape=x_train[0].shape, network_shape=args.shape, categories=categories, activation=args.activation) print("Training and Calculating mutual information") batch_size = min(args.batch_size, len(x_train)) if args.batch_size > 0 else len(x_train) no_of_batches = math.ceil(len(x_train) / batch_size) * args.epochs information_callback = CalculateInformationCallback( model, processor, x_full) model.fit( x_train, y_train, batch_size=batch_size, callbacks=[information_callback, ProgressBarCallback(no_of_batches)], epochs=args.epochs, validation_data=(x_test, y_test), verbose=0) append = ",b-" + str(information_callback.batch) print("Saving data to files") processor.save(args.dest + "/data/" + filename(args) + append) print("Producing image") processor.plot(args.dest + "/images/" + filename(args) + append) print("Done") return
def main(): DATA.set_current_dataset_name(DATA.mnist) # trans_types = TRANSFORMATION.supported_types() trans_types = [TRANSFORMATION.clean] adversary_types = ATTACK.get_AETypes() _, (X, Y) = load_data(DATA.CUR_DATASET_NAME) for transformation_type in trans_types: TRANSFORMATION.set_cur_transformation_type(transformation_type) try: # step 1. get a model. # case 1. train a new model model = train_model((X, Y), transformation_type) # or case 2. load an existing model # model = models.load_model('model-{}-cnn-{}'.format(DATA.CUR_DATASET_NAME, # transformation_type)) # step 2. estimate the model for adversary in adversary_types: X_adv_file = 'test_AE-{}-cnn-clean-{}.npy'.format( DATA.CUR_DATASET_NAME, adversary) print('Evaluating weak defenses on dataset [{}]'.format( X_adv_file)) X_adv_file = os.path.join(PATH.ADVERSARIAL_FILE, X_adv_file) X_adv = np.load(X_adv_file) test_model(model, copy.deepcopy((X_adv, Y)), transformation_type) del X_adv, X_adv_file print('') except (FileNotFoundError, OSError) as e: print(e) print('') continue del model
from models import APPNP, GAT, GCN, GFNN, MaskedGCN, MixHop, PPNP, SGC from data.data import load_data from train import Trainer from utils import preprocess_features import random import numpy as np import torch SEED = 18 random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.backends.cudnn.deterministic = True if __name__ == '__main__': data = load_data('cora') data.features = preprocess_features(data.features) model = GCN(data) trainer = Trainer(model, data, lr=0.01, weight_decay=5e-4, epochs=200, patience=10, niter=10, verbose=True) trainer.run()
def Train(cfg: dict) -> None: """ Execute train process with the base configs. :param cfg: configuration dictionary (Base.yaml) """ # Load train, dev data X_train, y_train, decoder_input_array_train, mel_spectro_data_array_train, max_X, vocab_size_source = load_data(cfg=cfg, mode="train") X_dev, y_dev, decoder_input_array_dev, mel_spectro_data_array_dev = load_data(cfg=cfg, mode="dev") print("---------------------------------------------------") print("Complete: Load train, dev data") print("---------------------------------------------------") # Make result directories model_path = cfg["model_path"] make_dir(model_path) #"./Models/" result_path = cfg["result_path"] make_dir(result_path) # "./Models/result/" print("---------------------------------------------------") print("Complete: Make result directories") print("---------------------------------------------------") # Save real json, img, video before training json_path = result_path + "json/" make_dir(json_path) # "./Models/result/json/" img_path = result_path + "img_video/" make_dir(img_path) # "./Models/result/"img_video/" data_path = cfg["data_path"] with open(data_path + 'out_files_dev' +'.pickle', 'rb') as f: output_file = pickle.load(f) with open(data_path + 'out_gloss_dev' +'.pickle', 'rb') as f: output_gloss = pickle.load(f) with open(data_path + 'out_skels_dev' +'.pickle', 'rb') as f: output_skels = pickle.load(f) real_json_path = json_path + 'real/' make_dir(real_json_path) real_img_path = img_path + 'real/' make_dir(real_img_path) for i in range(len(X_dev)): leng = output_skels[i] real = y_dev[i].tolist()[:leng] filename = str(output_file[i]) + '_' + str(output_gloss[i]) + '_real' + '.json' with open(real_json_path + filename, 'w', encoding='utf-8') as make_file: json.dump(real, make_file, indent="\t") #make img & video create_img_video(real_json_path, real_img_path, filename) print("---------------------------------------------------") print("Complete: Save real json, img and video files") print("---------------------------------------------------") # Build the tacotron model model = build_model(cfg=cfg, max_X=max_X, vocab_size_source=vocab_size_source) print("---------------------------------------------------") print("Complete: Build model") print("---------------------------------------------------") # Set Optimizer(Adam) and Loss(MSE) opt = Adam() model.compile(optimizer=opt, loss=['mean_squared_error', 'mean_squared_error']) # original was 'mean_absolute_error' # Set Callback options ### callback1: customized callback (save model and make prediction every 1000 epochs) first_callback = MyCallback('save_jsonfile', cfg, X_dev, y_dev, decoder_input_array_dev, output_file, output_gloss, output_skels) ### callback2: best model save (update best model.h5 every 10 epochs) best_path = model_path + "best_model.h5" best_callback = tf.keras.callbacks.ModelCheckpoint( filepath=best_path, monitor='val_loss', save_best_only=True, save_weights_only=False, verbose=1, period=10) ### callback3: learning rate scheduler (reduce LR by 20% when there is no enhancement of val_loss every 100 epochs) patience = cfg["training"].get("patience", 10) decrease_factor = cfg["training"].get("decrease_factor", 0.2) min_LR = cfg["training"].get("min_LR", 0.00001) reduceLR = ReduceLROnPlateau( monitor='val_loss', factor=decrease_factor, patience=patience, min_lr=min_LR) ### (optional callback) # 1. early stopping #early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience = 20) print("---------------------------------------------------") print("Start training!") print("---------------------------------------------------") # Fit Model batch_size = cfg["training"].get("batch_size", 2) epochs = cfg["training"].get("epoch", 100) train_history = model.fit([X_train, decoder_input_array_train], mel_spectro_data_array_train, epochs=epochs, batch_size=batch_size, shuffle=False, verbose=1, validation_data=([X_dev, decoder_input_array_dev], mel_spectro_data_array_dev), callbacks = [first_callback, best_callback, reduceLR]) #total 3 callbacks print("---------------------------------------------------") print("Finish Training! Save the last model and prediction.") print("---------------------------------------------------") # Save the last Model(100 epoch) and prediction model.save(model_path + 'model.h5') make_predict(cfg, model, X_dev, y_dev, decoder_input_array_dev, output_file, output_gloss, output_skels, result_path, epochs, best=False) print("---------------------------------------------------") print("Congrats! All works well~!") print("---------------------------------------------------")
def load(): data.load_data(db) return "ok"
print('Reshaping...', data.shape, 'to', reshaped.shape) return reshaped if __name__ == '__main__': training_params = { 'model': 'rf', 'dataset': DATA.mnist, 'n_estimators': 100, 'criterion': 'gini', } transformations = TRANSFORMATION.supported_types() # transformations = [TRANSFORMATION.clean] (X_train, Y_train), (X_test, Y_test) = load_data(DATA.mnist) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) MODEL_DIR = os.path.join(PATH.MODEL, 'rf_mnist') save_path = 'mnist-rf-' if not os.path.exists(MODEL_DIR): import pathlib print(MODEL_DIR, 'does not exist. Create it.') pathlib.Path(MODEL_DIR).mkdir(parents=True, exist_ok=True) for trans in transformations: save_path = 'mnist-rf-' + trans + '.rf' save_path = os.path.join(MODEL_DIR, save_path)
def craft(dataset, gen_test=True, method=ATTACK.FGSM, trans_type=TRANSFORMATION.clean): print('loading original images...') if gen_test: # generate for test set _, (X, Y) = load_data(dataset) prefix = 'test' else: # generate for train set (the last 20% of the original train set) (X, Y), _ = load_data(dataset) nb_trainings = int(X.shape[0] * 0.8) X = X[nb_trainings:] Y = Y[nb_trainings:] prefix = 'val' """ In debugging mode, crafting for 50 samples. """ if MODE.DEBUG: X = X[:30] Y = Y[:30] X = transform(X, trans_type) model_name = 'model-{}-cnn-{}'.format(dataset, trans_type) if method == ATTACK.FGSM: for eps in ATTACK.get_fgsm_eps(): print('{}: (eps={})'.format(method.upper(), eps)) X_adv, _ = get_adversarial_examples(model_name, method, X, Y, eps=eps) attack_params = 'eps{}'.format(int(1000 * eps)) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.BIM: for ord in ATTACK.get_bim_norm(): for nb_iter in ATTACK.get_bim_nbIter(): for eps in ATTACK.get_bim_eps(ord): print('{}: (ord={}, nb_iter={}, eps={})'.format( method.upper(), ord, nb_iter, eps)) X_adv, _ = get_adversarial_examples(model_name, method, X, Y, ord=ord, nb_iter=nb_iter, eps=eps) if ord == np.inf: norm = 'inf' else: norm = ord attack_params = 'ord{}_nbIter{}_eps{}'.format( norm, nb_iter, int(1000 * eps)) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.DEEPFOOL: for order in [2]: for overshoot in ATTACK.get_df_overshoots(order): print('attack {} -- order: {}; overshoot: {}'.format( method.upper(), order, overshoot)) X_adv, _ = get_adversarial_examples(model_name, method, X, Y, ord=order, overshoot=overshoot) attack_params = 'l{}_overshoot{}'.format(order, int(overshoot)) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.CW_L2: binary_search_steps = 16 #9 cw_batch_size = 2 #1 initial_const = 1 #10 for learning_rate in ATTACK.get_cwl2_lr(): for max_iter in ATTACK.get_cwl2_maxIter(): print('{}: (ord={}, max_iterations={})'.format( method.upper(), 2, max_iter)) X_adv, _ = get_adversarial_examples( model_name, method, X, Y, ord=2, max_iterations=max_iter, binary_search_steps=binary_search_steps, cw_batch_size=cw_batch_size, initial_const=initial_const, learning_rate=learning_rate) attack_params = 'lr{}_maxIter{}'.format( int(learning_rate * 1000), max_iter) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.CW_Linf: initial_const = 1e-5 # X *= 255. for learning_rate in ATTACK.get_cwl2_lr(): for max_iter in ATTACK.get_cwl2_maxIter(): print('{}: (ord={}, max_iterations={})'.format( method.upper(), np.inf, max_iter)) X_adv, _ = get_adversarial_examples( model_name, method, X, Y, max_iterations=max_iter, initial_const=initial_const, learning_rate=learning_rate) attack_params = 'lr{}_maxIter{}'.format( int(learning_rate * 10), max_iter) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.CW_L0: initial_const = 1e-5 for learning_rate in ATTACK.get_cwl2_lr(): for max_iter in ATTACK.get_cwl2_maxIter(): print('{}: (ord={}, max_iterations={})'.format( method.upper(), np.inf, max_iter)) X_adv, _ = get_adversarial_examples( model_name, method, X, Y, max_iterations=max_iter, initial_const=initial_const, learning_rate=learning_rate) attack_params = 'lr{}_maxIter{}'.format( int(learning_rate * 10), max_iter) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.JSMA: for theta in ATTACK.get_jsma_theta(): for gamma in ATTACK.get_jsma_gamma(): print('{}: (theta={}, gamma={})'.format( method.upper(), theta, gamma)) X_adv, _ = get_adversarial_examples(model_name, method, X, Y, theta=theta, gamma=gamma) attack_params = 'theta{}_gamma{}'.format( int(100 * theta), int(100 * gamma)) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.PGD: nb_iter = 1000 eps_iter = 0.05 #0.01 for eps in ATTACK.get_pgd_eps(): if eps < 0.05: eps_iter = 0.01 elif eps <= 0.01: eps_iter = 0.005 X_adv, _ = get_adversarial_examples(model_name, method, X, Y, eps=eps, nb_iter=nb_iter, eps_iter=eps_iter) attack_params = 'eps{}_nbIter{}_epsIter{}'.format( int(1000 * eps), nb_iter, int(1000 * eps_iter)) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.ONE_PIXEL: for pixel_counts in ATTACK.get_op_pxCnt(): for max_iter in ATTACK.get_op_maxIter(): for pop_size in ATTACK.get_op_popsize(): attack_params = { 'pixel_counts': pixel_counts, 'max_iter': max_iter, 'pop_size': pop_size } X_adv, _ = get_adversarial_examples( model_name, method, X, Y, **attack_params) X_adv = np.asarray(X_adv) attack_params = 'pxCount{}_maxIter{}_popsize{}'.format( pixel_counts, max_iter, pop_size) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) elif method == ATTACK.MIM: for eps in ATTACK.get_mim_eps(): for nb_iter in ATTACK.get_mim_nbIter(): attack_params = {'eps': eps, 'nb_iter': nb_iter} X_adv, _ = get_adversarial_examples(model_name, method, X, Y, **attack_params) attack_params = 'eps{}_nbIter{}'.format( int(eps * 100), nb_iter) reset(X, trans_type) reset(X_adv, trans_type) save_adv_examples(X_adv, prefix=prefix, bs_samples=X, dataset=dataset, transformation=trans_type, attack_method=method, attack_params=attack_params) del X del Y
#!/usr/bin/env python import sys import os from data import data import marshal sent_file = sys.argv[1] d = data.load_data(sent_file) token_seq = data.tokenize(d) marshal_file = os.path.splitext(sent_file)[0] + '.marshal' marshal.dump(token_seq, open(marshal_file, 'w')) print('DONE ' + sent_file)
from keras.callbacks import ModelCheckpoint from keras.layers import Convolution2D, Activation, Flatten, Dense, MaxPooling2D, Dropout from keras.models import Sequential from data.data import load_data if __name__ == '__main__': train_data, test_data, train_labels, test_labels = load_data( './data/fer2013.csv') model = Sequential() model.add( Convolution2D(64, 3, 3, border_mode='valid', input_shape=(1, 48, 48))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(128, 5, 5)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(512, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25))
def main(): from data.data import load_data model = models.load_model('data/models/model-mnist-cnn-clean.h5') _, (X, Y) = load_data(DATA.mnist) print(model.evaluate(X, Y, verbose=1))
def run(stem_fn, block_fn, classifier_fn, voting_strategy_fn, boosting_strategy_fn, training_style, epochs, batch_size, block_num, dataset_name, classes, metrics_options, log_dir, load_stem=None, patience=12, progressive_training_epochs=5): """ Args: load_stem (str): path to the weights file for the stem """ metrics.setup_log_files(log_dir, block_num, metrics_options) # load data train_gen, validate_gen, train_data_shape, validate_data_shape, label_shape, class_num = data.load_data( dataset_name, batch_size, classes) data_ph, label_ph, _, weak_logits, classifier, classification_metrics = boosted_classifier.build_model( stem_fn, block_fn, classifier_fn, block_num, voting_strategy_fn, batch_size, class_num, train_data_shape, label_shape, load_stem=load_stem) stem_saver = tf.train.Saver( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='stem')) weighted_losses = boosting_strategy.calculate_boosted_losses( boosting_strategy_fn, weak_logits, label_ph, batch_size, class_num) weights_scale_ph = tf.placeholder_with_default( tf.ones([block_num]), [block_num]) def feed_dict_fn(epoch): data, labels = next(train_gen) feed_dict = {data_ph: data, label_ph: labels} if training_style == 'progressive': val = np.zeros([block_num], dtype=np.float32) val[epoch // 2] = 1. val[(epoch // 2) - 1] = 0. feed_dict[weights_scale_ph] = val return feed_dict def validate_feed_dict_fn(): data, labels = next(validate_gen) feed_dict = {data_ph: data, label_ph: labels} return feed_dict # calculate gradients optimizer = tf.train.AdamOptimizer() final_grads_and_vars, grad_metrics = boosting_strategy.calculate_boosted_gradients( optimizer, weighted_losses, weights_scale_ph) train_op = optimizer.apply_gradients(final_grads_and_vars) # if the voting strategy has an update fn, use it # I, for one, welcome our new duck typing overlords if hasattr(classifier.voting_strategy, 'update'): voting_strategy_update_op = classifier.voting_strategy.update( weak_logits, label_ph) train_op = tf.group(train_op, voting_strategy_update_op) print("Trainable Parameters: {}".format( np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ]))) verbose_ops_dict = classification_metrics if 'gradient_norms' in metrics_options: verbose_ops_dict.update(grad_metrics) # initialize session and train process_metrics_fn = functools.partial( metrics.process_metrics, log_dir=log_dir, options=metrics_options) early_stopping_fn = util.build_early_stopping_fn(patience=patience) full_metrics = util.train( train_op, epochs, train_steps_per_epoch=train_data_shape[0] // batch_size, validate_steps_per_epoch=validate_data_shape[0] // batch_size, verbose_ops_dict=verbose_ops_dict, train_feed_dict_fn=feed_dict_fn, validate_feed_dict_fn=validate_feed_dict_fn, process_metrics_fn=process_metrics_fn, early_stopping_fn=early_stopping_fn, stem_saver=stem_saver, stem=load_stem) return full_metrics
summary_path = "{}_{}_summary.csv".format(name, folders[-1]) else: summary_path = name + "_summary.csv" if params.extend: try: summary_path = path.abspath(summary_path) df = pd.read_csv(summary_path) except: print("Could not load CSV from {}".format(summary_path)) sys.exit(1) df = clean_data(df) else: data_path = path.abspath(params.data_path) df = load_data(data_path) df["summary_cluster"] = [""] * len(df) df["summary_textrank"] = [""] * len(df) print("Starting Summarization of Articles") print("Summary File Location: {}".format(summary_path)) for index, row in tqdm(df.iterrows(), total=len(df)): if row.summary_cluster == "" or row.summary_textrank == "": article = row.text if len(transformer.tokenizer(article)["input_ids"]) <= 510: summary_cluster = article summary_textrank = article else: summary_cluster = summarize(article=article, cluster_alg="hdbscan",
def train_model(model, dataset, model_name, need_augment=False, is_BB=False, **kwargs): (X_train, Y_train), _ = data.load_data(dataset) return train(model, X_train, Y_train, model_name, need_augment, is_BB=is_BB, **kwargs)
import tensorflow as tf import tensorlayer as tl import numpy as np import os from data import data print("=======TEST.PY IMPORTED WHAT THE F**K=======") metadata, idx_q, idx_a = data.load_data(PATH='data/') w2idx = metadata['w2idx'] # dict word 2 index idx2w = metadata['idx2w'] # list index 2 word print("Loading vocab done:", "shapes", idx_q.shape, idx_a.shape) emb_dim = 512 batch_size = 256 xvocab_size = yvocab_size = len(idx2w) unk_id = w2idx['unk'] # 1 pad_id = w2idx['_'] # 0 start_id = xvocab_size end_id = xvocab_size + 1 w2idx['start_id'] = start_id w2idx['end_id'] = end_id idx2w = idx2w + ['start_id', 'end_id'] xvocab_size = yvocab_size = xvocab_size + 2 w2idx['end_id']
print("------AUGMENT SUMMARY-------") print("EXPERIMENT ROOT:", args.experiment_root) print("MODEL CONFIGS:", args.model_configs) print("OUTPUT ROOT:", args.output_root) print('----------------------------\n') # ---------------------------- # parse configurations (into a dictionary) from json file # ---------------------------- model_configs = load_from_json(args.model_configs) model_configs["wresnet"]["dir"] = args.experiment_root + model_configs.get("wresnet").get("dir") # --------------------------- # load the targeted model # --------------------------- # In the context of the adversarially trained model, # we use the undefended model as adversary's target model. savefile = "AdvTrained-cifar100.pth" model_file = os.path.join(model_configs.get("wresnet").get('dir'), model_configs.get("wresnet").get("pgd_trained_cifar")) model, _, _ = load_model(file=model_file, model_configs=model_configs.get("wresnet"), trans_configs=None) (x_train, y_train), _ = load_data('cifar100') pgd_adv_train(model=model, data=(x_train, y_train), outpath=args.output_root, model_name=savefile )
def main(): args = get_parameters() filename(args) (x_train, y_train), (x_test, y_test), categories = load_data(args.data_set, args.train_size) no_of_batches = math.ceil(len(x_train) / args.batch_size) * args.epochs epoch_list = args.epoch_list if epoch_list[-1][1] > no_of_batches: raise ValueError( "ranges out of range of training batches, number of batches {}, out of range value {}" .format(no_of_batches, epoch_list[-1])) model = get_model_categorical(input_shape=x_train[0].shape, network_shape=args.shape, categories=categories, activation=args.activation) print("batches {}".format(no_of_batches)) save_layers_callback = SaveLayers(model, x_test, epoch_list) model.fit(x_train, y_train, batch_size=args.batch_size, callbacks=[save_layers_callback], epochs=args.epochs, validation_data=(x_test, y_test), verbose=1) def compute_single(saved, dist): x_test_hash = hash_data(x_test) data_x = x_test_hash for _ in range(dist - 1): data_x = np.concatenate((data_x, x_test_hash)) y_test_hash = hash_data(y_test) data_y = y_test_hash for _ in range(dist - 1): data_y = np.concatenate((data_y, y_test_hash)) # saved data where every number is binned saved_bin = [[ bin_array(layer, bins=args.bins, low=layer.min(), high=layer.max()) for layer in epoch ] for epoch in saved] # saved data where every number is hashed saved_hash = [[hash_data(layer) for layer in epoch] for epoch in saved_bin] data_t = {} for t in range(len(saved_hash[0])): data_t[t] = np.array([], dtype=np.int64) for epoch in range(len(saved_hash)): for t in range(len(saved_hash[0])): data_t[t] = np.concatenate([data_t[t], saved_hash[epoch][t]]) data_t = list(data_t.values()) h_t = np.array([entropy_of_data(t) for t in data_t]) h_t_x = np.array([__conditional_entropy(t, data_x) for t in data_t]) h_t_y = np.array([__conditional_entropy(t, data_y) for t in data_t]) i_x_t = h_t - h_t_x i_y_t = h_t - h_t_y return i_x_t, i_y_t saved = save_layers_callback.saved_layers IXT, IYT = [], [] pickle = {} for s, r in zip(saved, epoch_list): print("computing information for layers {}".format(r), end="") start, end = r dist = end - start ixt, iyt = compute_single(s, dist) print(" {} {}".format(ixt, iyt)) pickle[start] = (ixt, iyt, []) IXT.append(ixt) IYT.append(iyt) path = args.dest + "/data/as_if_random/" + filename(args) _pickle.dump(pickle, open(path, 'wb')) path = args.dest + "/images/as_if_random/" + filename(args) plot_main(IXT, IYT, filename=path, show=True) return
return project_as_dict(get_project()) class Libraries(Resource): def get(self): return libraries_as_dict(get_project().libraries.values()) class SearchTests(Resource): def get(self, tag): suite = get_project().suite return tests_as_dict(search_tests_by_tag(suite, tag)) class SearchKeywords(Resource): def get(self, pattern): return keywords_as_dict( search_keywords_by_pattern(get_project().get_all_keywords(), pattern) ) api.add_resource(Project, '/project') api.add_resource(Libraries, '/libraries') api.add_resource(SearchTests, '/search/tests/<tag>') api.add_resource(SearchKeywords, '/search/keywords/<pattern>') if __name__ == "__main__": load_data(sys.argv[1]) app.run(debug=True)
def Test(cfg: dict) -> None: """ Execute test process with the base configs. :param cfg: configuration dictionary (Base.yaml) """ # Load the test data X_test, y_test, decoder_input_array_test, mel_spectro_data_array_test = load_data(cfg=cfg, mode="test") print("---------------------------------------------------") print("Complete: Load test data") print("---------------------------------------------------") # Load preprocessing data(output_file, output_gloss, output_skels) path= cfg["data_path"] with open(path + 'out_files_test' +'.pickle', 'rb') as f: output_file = pickle.load(f) with open(path + 'out_gloss_test' +'.pickle', 'rb') as f: output_gloss = pickle.load(f) with open(path + 'out_skels_test' +'.pickle', 'rb') as f: output_skels = pickle.load(f) # Make test result directory result_path = cfg["test_result_path"] make_dir(result_path) # "./test_result/" save_path = result_path + "json/" make_dir(save_path) # "./test_result/json/" img_path = result_path + "img_video/" make_dir(img_path) # "./test_result/img_video/" print("---------------------------------------------------") print("Complete: Make test_result directories") print("---------------------------------------------------") # Load Model(best or recent) test_mode = cfg["test_mode"] if test_mode == "best": best_model_path = cfg["model_path"] + "best_model.h5" model = tf.keras.models.load_model(best_model_path) # best model load print("---------------------------------------------------") print("Complete: Load best model") print("---------------------------------------------------") # Make prediction files(json and img, video) make_predict(cfg, model, X_test, y_test, decoder_input_array_test, output_file, output_gloss, output_skels, result_path, epoch=None, best=True) elif test_mode == "recent": recent_model_path = cfg["model_path"] + "model.h5" model = tf.keras.models.load_model(recent_model_path) # most recent model load print("---------------------------------------------------") print("Complete: Load recent model") print("---------------------------------------------------") # Make prediction files(json and img, video) make_predict(cfg, model, X_test, y_test, decoder_input_array_test, output_file, output_gloss, output_skels, result_path, epoch=None, best=False) print("---------------------------------------------------") print("Complete: Save prediction json, img and video files") print("---------------------------------------------------")
def train(dataset, model=None, trans_type=TRANSFORMATION.clean, save_path='cnn_mnist.h5', eval=True, **kwargs): """ Train a cnn model on MNIST or Fashion-MNIST. :param dataset: :param model: a model to train. :param trans_type: transformation associated to the model. :param save_path: file name, including the path, to save the trained model. :param kwargs: customized loss function, optimizer, etc. for cleverhans to craft AEs. :return: the trained model """ lr = 0.001 validation_rate = 0.2 optimizer = kwargs.get('optimizer', keras.optimizers.Adam(lr=lr)) loss_fn = kwargs.get('loss', keras.losses.categorical_crossentropy) metrics = kwargs.get('metrics', 'default') logger.info('optimizer: [{}].'.format(optimizer)) logger.info('loss function: [{}].'.format(loss_fn)) logger.info('metrics: [{}].'.format(metrics)) (X_train, Y_train), (X_test, Y_test) = data.load_data(dataset) X_train = data_utils.set_channels_last(X_train) X_test = data_utils.set_channels_last(X_test) # Apply transformation (associated to the weak defending model) X_train = data_utils.rescale(transform(X_train, trans_type)) X_test = data_utils.rescale(transform(X_test, trans_type)) nb_examples, img_rows, img_cols, nb_channels = X_train.shape nb_train_samples = int(nb_examples * (1. - validation_rate)) train_examples = X_train[:nb_train_samples] train_labels = Y_train[:nb_train_samples] val_examples = X_train[nb_train_samples:] val_labels = Y_train[nb_train_samples:] if model is None: model = create_model(input_shape=(img_rows, img_cols, nb_channels)) # Compile model if ('default' == metrics): model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy']) else: model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy', metrics]) # Train model batch_size = kwargs.get('batch_size', 128) epochs = kwargs.get('epochs', 20) start = time.monotonic() history = model.fit(train_examples, train_labels, batch_size=batch_size, epochs=epochs, verbose=2, validation_data=(val_examples, val_labels)) cost = time.monotonic() - start logger.info('Done training. It costs {} minutes.'.format(cost / 60.)) if eval: scores_train = model.evaluate(train_examples, train_labels, batch_size=128, verbose=0) scores_val = model.evaluate(val_examples, val_labels, batch_size=128, verbose=0) scores_test = model.evaluate(X_test, Y_test, batch_size=128, verbose=0) logger.info('Evaluation on [{} set]: {}.'.format( 'training', scores_train)) logger.info('Evaluation on [{} set]: {}.'.format( 'validation', scores_val)) logger.info('Evaluation on [{} set]: {}.'.format( 'testing', scores_test)) logger.info('Save the trained model to [{}].'.format(save_path)) model.save(save_path) checkpoints_file = save_path.split('/')[-1].split('.')[0] checkpoints_file = 'checkpoints_train_' + checkpoints_file + '.csv' checkpoints_file = os.path.join(LOG_DIR, checkpoints_file) if not os.path.dirname(LOG_DIR): os.mkdir(LOG_DIR) logger.info('Training checkpoints have been saved to file [{}].'.format( checkpoints_file)) file.dict2csv(history.history, checkpoints_file) save_path = save_path.split('/')[-1].split('.')[0] save_path = 'hist_train_' + save_path + '.pdf' plot_training_history(history, save_path) return model
#!/usr/bin/env python import sys import os from data import data import marshal sent_file = sys.argv[1] d = data.load_data(sent_file) token_seq = data.tokenize(d) marshal_file = os.path.splitext(sent_file)[0] + ".marshal" marshal.dump(token_seq, open(marshal_file, "w")) print("DONE " + sent_file)
def get(self): return project_as_dict(get_project()) class Libraries(Resource): def get(self): return libraries_as_dict(get_project().libraries.values()) class SearchTests(Resource): def get(self, tag): suite = get_project().suite return tests_as_dict(search_tests_by_tag(suite, tag)) class SearchKeywords(Resource): def get(self, pattern): return keywords_as_dict( search_keywords_by_pattern(get_project().get_all_keywords(), pattern)) api.add_resource(Project, '/project') api.add_resource(Libraries, '/libraries') api.add_resource(SearchTests, '/search/tests/<tag>') api.add_resource(SearchKeywords, '/search/keywords/<pattern>') if __name__ == "__main__": load_data(sys.argv[1]) app.run(debug=True)
file=model_file, model_configs=model_configs.get("wresnet"), trans_configs=None) else: model_file = os.path.join( model_configs.get("wresnet").get('dir'), model_configs.get("wresnet").get("um_file")) model, _, _ = load_model( file=model_file, model_configs=model_configs.get("wresnet"), trans_configs=None) # train a model first from data.data import load_data from adversarial_train import pgd_adv_train (x_train, y_train), _ = load_data('cifar100', channel_first=True) print('>>> Training the model...') target = pgd_adv_train( model=model, data=(x_train, y_train), outpath=model_configs.get("wresnet").get('dir'), model_name=model_configs.get("wresnet").get( "pgd_trained_cifar")) elif args.targeted_model == 'ensemble': # In the context of the white-box threat model, # we use the ensemble as adversary's target model. # load weak defenses (in this example, load a tiny pool of 3 weak defenses) if args.selected_pool is None: selected_pool = "demo_pool"
def gen_greedy(dataset, attacker=ATTACK.FGSM, attack_count=None, strategy=ATTACK_STRATEGY.RANDOM.value): config = tf.ConfigProto(intra_op_parallelism_threads=4, inter_op_parallelism_threads=4) sess = tf.Session(config=config) keras.backend.set_session(sess) candidates = init_candidate_targets( 'ensemble/mnist_weak_defenses_fsgm.list') print('...In total {} weak defenses.'.format(len(candidates))) prefix = 'wb' # white-box if attack_count == None or attack_count <= 0: prefix = 'gb' # gray-box attack_count = len(candidates.keys()) X_adv = [] _, (X, Y) = load_data(dataset=dataset) # generate 500 samples batch_size = 100 nb_samples = Y.shape[0] nb_iter = int(nb_samples / batch_size) start = time.monotonic() for i in range(nb_iter): start_idx = i * batch_size end_idx = min((i + 1) * batch_size, nb_samples) print(start_idx, end_idx) X_batch = X[start_idx:end_idx] Y_batch = Y[start_idx:end_idx] print('...In total {} inputs.'.format(Y.shape[0])) idx = 0 for x, y in zip(X_batch, Y_batch): print('{}-th input...'.format(idx)) x = np.expand_dims(x, axis=0) strategy = ATTACK_STRATEGY.RANDOM.value ''' generate_single(sess, x, y, attacker=ATTACK.FGSM, candidates=None, attack_count=None, max_perturb=get_perturb_upperbound(), strategy=ATTACK_STRATEGY.RANDOM.value) ''' start_sample = time.monotonic() X_adv.append( generate_single(sess, x, y, attacker, candidates, attack_count, strategy=strategy)) end_sample = time.monotonic() print('({}, {})-th sample: {}\n\n'.format( i, idx, (end_sample - start_sample))) idx += 1 save_adv_examples(np.asarray(X_adv), prefix=prefix, bs_samples=X_batch, dataset=dataset, transformation=strategy, attack_method=attacker, attack_params='eps100_batchsize{}_{}'.format( batch_size, i)) duration = time.monotonic() - start print('----------------------------------') print(' Summary') print('----------------------------------') print('Number of inputs:', Y.shape[0]) print('Adversary:', attacker) print('Strategy:', strategy) print('Time cost:', duration) sess.close()
with open(filename, 'rb') as file: model = pickle.load(file) return model """ if __name__ == '__main__': transformations = TRANSFORMATION.supported_types() data = { 'dataset': DATA.mnist, 'architecture': 'svm', } (X_train, Y_train), (X_test, Y_test) = load_data(data['dataset']) Y_train = np.argmax(Y_train, axis=1) Y_test = np.argmax(Y_test, axis=1) for trans in transformations: data['trans'] = trans data['train'] = (transform(X_train, trans), Y_train) data['test'] = (transform(X_test, trans), Y_test) model = train(data, training_params=default_train_params) filename = 'model-{}-{}-{}.pkl'.format(data['dataset'], data['architecture'], data['trans'])
from sklearn.preprocessing import LabelEncoder from keras.utils import to_categorical from data.data import load_data # Parse the Arguments save_weights = "pretrained/cnn_weights.h5" model_name = 'pretrained/model.h5' Saved_Weights_Path = None if not os.path.exists('pretrained'): os.makedirs('pretrained') # Read/Download MNIST Dataset print('Loading Dataset...') X_train, Y_train, X_test, Y_test = load_data() # Divide data into testing and training sets. train_img, train_labels, test_img, test_labels = X_train, Y_train, X_test, Y_test # Now each image rows and columns are of 28x28 matrix type. img_rows, img_columns = 28, 28 # Transform training and testing data to 10 classes in range [0,classes] ; num. of classes = 0 to 9 = 10 classes total_classes = 13 # 0 to 9 labels + - * #each code of matrix have a value like of class like [0,1,.....,+,*,...] encoder = LabelEncoder() tra = encoder.fit_transform(Y_train) train_labels = to_categorical(tra)