def train_and_eval_models(train_x, test_x, train_y, test_y): train_x, dev_x, train_y, dev_y = train_test_split(train_x, train_y, test_size=0.1, random_state=42, stratify=train_y) train_x, scaler = data_utils.normalize(train_x) dev_x, _ = data_utils.normalize(dev_x, scaler=scaler) test_x, _ = data_utils.normalize(test_x, scaler=scaler) dump(scaler, 'scaler_{}.joblib'.format(MODALITY)) cprint('Normalized data (zero-score standardization)', 'yellow') print('Dataset shapes: ', train_x.shape, dev_x.shape, test_x.shape) print('Class distributions for train/dev/test:') print(np.unique(train_y, return_counts=True)) print(np.unique(dev_y, return_counts=True)) print(np.unique(test_y, return_counts=True)) # code for SVM training # svm = SVC(C=C, kernel=KERNEL, degree=DEGREE, gamma='auto') # svm.fit(train_x, train_y) # dump(svm, 'svm_{}_{}-kernel_C{}.joblib'.format(MODALITY, KERNEL, C)) # code for MLP training mlp = MLPClassifier(hidden_layer_sizes=HIDDEN_LAYERS, alpha=ALPHA, early_stopping=True, max_iter=200) mlp.fit(train_x, train_y) print(f'mlp.n_iter_: {mlp.n_iter_}') dump(mlp, 'mlp_{}.joblib'.format(MODALITY)) # Decision Tree # dt = tree.DecisionTreeClassifier(max_depth=None) # dt.fit(train_x, train_y) # print('DT information\nDepth: {}\nNumber_leaves: {}'.format( # dt.get_depth(), # dt.get_n_leaves() # )) # dump(dt, 'dt_{}.joblib'.format(MODALITY)) # print('DT feature importances:\n{}'.format(dt.feature_importances_)) # print(dt.feature_importances_.shape, np.min(dt.feature_importances_), # np.max(dt.feature_importances_), sum(dt.feature_importances_)) # predictions # cprint('\nSVM PERFORMANCE', 'green') # cprint(f'SVM parameters:\n {svm}\n', 'yellow') # svm_uars = score(svm, train_x, train_y, dev_x, dev_y, test_x, test_y) cprint('\nMLP PERFORMANCE', 'green') cprint(f'MLP parameters:\n {mlp}\n', 'yellow') mlp_uars = score(mlp, train_x, train_y, dev_x, dev_y, test_x, test_y) # cprint('\nDT PERFORMANCE', 'green') # cprint(f'DT parameters:\n {dt}\n', 'yellow') # dt_uars = score(dt, train_x, train_y, dev_x, dev_y, test_x, test_y) # return svm_uars, mlp_uars, dt_uars return mlp_uars
def combined_fill_between(axes, base_lines, x, ymin, ymax, *args, **kwargs): axes[0].fill_between(x, ymin, ymax, *args, **kwargs) base_lines = handle_base_line_exceptions(base_lines, axes) for ax, base_line in zip(axes[1:], base_lines): if base_line is not None: comb = data_utils.normalize(base_line, x, ymin, yerr=ymax) ax.fill_between(comb[0], comb[1], comb[2], *args, **kwargs)
def combined_plot(axes, base_lines, x, y, *args, **kwargs): axes[0].plot(x, y, *args, **kwargs) base_lines = handle_base_line_exceptions(base_lines, axes) for ax, base_line in zip(axes[1:], base_lines): if base_line is not None: comb = data_utils.normalize(base_line, x, y) ax.plot(comb[0], comb[1], *args, **kwargs)
def learn(filepath, save_figures=False): X, Y = data_utils.get_full_set(filepath) X = data_utils.normalize(X) Y = data_utils.aggregate_wine_labels(Y) score_fn = accuracy_score learning.pet_learning_rate(X, Y, score_fn, save=save_figures) plt.show()
def adv_attack_template_S(img_tensor, GAN, target_sz=(127, 127)): '''adversarial attack to template''' '''input: pytorch tensor(0,255) ---> output: pytorch tensor(0,255)''' '''step1: Normalization''' img_tensor = normalize(img_tensor) '''step2: pass to G''' with torch.no_grad(): img_adv = GAN.transform(img_tensor, target_sz) return img_adv
def combined_errorbar(axes, base_lines, x, y, yerr=None, **kwargs): axes[0].errorbar(x, y, yerr=yerr, **kwargs) base_lines = handle_base_line_exceptions(base_lines, axes) for ax, base_line in zip(axes[1:], base_lines): comb = data_utils.normalize(base_line, x, y, yerr=yerr) if len(comb[0]) != len(comb[1]): raise Exception( "normalize gave incoherent data: len(x) != len(y): " + str(len(comb[0])) + " != " + str(len(comb[1]))) ax.errorbar(comb[0], comb[1], yerr=comb[2], **kwargs)
def adv_attack_template(img_tensor, GAN): '''adversarial attack to template''' '''input: pytorch tensor(0,255) ---> output: pytorch tensor(0,255)''' '''step1: Normalization''' img_tensor = normalize(img_tensor) '''step2: pass to G''' with torch.no_grad(): GAN.template_clean1 = img_tensor GAN.forward() img_adv = GAN.template_adv255 return img_adv
def forward(self, x): block1 = self.block1(normalize(x)) block2 = self.block2(block1) block3 = self.block3(block2) block4 = self.block4(block3) block5 = self.block5(block4) block6 = self.block6(block5) block7 = self.block7(block6) block8 = self.block8(block1 + block7) return (torch.tanh(block8) + 1) / 2
def main( model_dir: str, vc_src: str, vc_tgt: str, adv_tgt: str, output: str, eps: float, n_iters: int, attack_type: str, ): assert attack_type == "emb" or vc_src is not None model, config, attr, device = load_model(model_dir) vc_tgt = file2mel(vc_tgt, **config["preprocess"]) adv_tgt = file2mel(adv_tgt, **config["preprocess"]) vc_tgt = normalize(vc_tgt, attr) adv_tgt = normalize(adv_tgt, attr) vc_tgt = torch.from_numpy(vc_tgt).T.unsqueeze(0).to(device) adv_tgt = torch.from_numpy(adv_tgt).T.unsqueeze(0).to(device) if attack_type != "emb": vc_src = file2mel(vc_src, **config["preprocess"]) vc_src = normalize(vc_src, attr) vc_src = torch.from_numpy(vc_src).T.unsqueeze(0).to(device) if attack_type == "e2e": adv_inp = e2e_attack(model, vc_src, vc_tgt, adv_tgt, eps, n_iters) elif attack_type == "emb": adv_inp = emb_attack(model, vc_tgt, adv_tgt, eps, n_iters) elif attack_type == "fb": adv_inp = fb_attack(model, vc_src, vc_tgt, adv_tgt, eps, n_iters) else: raise NotImplementedError() adv_inp = adv_inp.squeeze(0).T adv_inp = denormalize(adv_inp.data.cpu().numpy(), attr) adv_inp = mel2wav(adv_inp, **config["preprocess"]) sf.write(output, adv_inp, config["preprocess"]["sample_rate"])
def set_input(self, input): """Unpack input data from the dataloader and perform necessary pre-processing steps. Parameters: input (dict): include the data itself and its metadata information. The option 'direction' can be used to swap images in domain A and domain B. """ self.template_clean255 = input[0].squeeze(0).cuda() # pytorch tensor, shape=(1,3,127,127) [0,255] self.template_clean1 = normalize(self.template_clean255) # print('clean image shape:',self.init_frame_clean.size()) self.X_crops = input[1].squeeze(0).cuda() # pytorch tensor, shape=(N,3,255,255)
def adv_attack_search_new(img_tensor, GAN, search_sz=(255, 255)): '''adversarial attack to search region''' '''input: pytorch tensor(0,255) ---> output: pytorch tensor(0,255)''' '''step1: Normalization''' img_tensor = normalize(img_tensor) '''step2: pass to G''' with torch.no_grad(): GAN.tensor_clean1 = img_tensor GAN.num_search = img_tensor.size(0) GAN.forward(search_sz) img_adv = GAN.tensor_adv255 return img_adv
def test_one(self): test_array = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) normalized = normalize(test_array) assert normalized[0] == 0.0 and normalized[10] == 1.0
def parse_method(args, dataset, n, c, d, device): if args.method == 'link': model = LINK(n, c).to(device) elif args.method == 'gcn': if args.dataset == 'ogbn-proteins': # Pre-compute GCN normalization. dataset.graph['edge_index'] = normalize( dataset.graph['edge_index']) model = GCN(in_channels=d, hidden_channels=args.hidden_channels, out_channels=c, dropout=args.dropout, save_mem=True, use_bn=not args.no_bn).to(device) else: model = GCN(in_channels=d, hidden_channels=args.hidden_channels, out_channels=c, num_layers=args.num_layers, dropout=args.dropout, use_bn=not args.no_bn).to(device) elif args.method == 'mlp' or args.method == 'cs': model = MLP(in_channels=d, hidden_channels=args.hidden_channels, out_channels=c, num_layers=args.num_layers, dropout=args.dropout).to(device) elif args.method == 'sgc': if args.cached: model = SGC(in_channels=d, out_channels=c, hops=args.hops).to(device) else: model = SGCMem(in_channels=d, out_channels=c, hops=args.hops).to(device) elif args.method == 'gprgnn': model = GPRGNN(d, args.hidden_channels, c, alpha=args.gpr_alpha).to(device) elif args.method == 'appnp': model = APPNP_Net(d, args.hidden_channels, c, alpha=args.gpr_alpha).to(device) elif args.method == 'gat': model = GAT(d, args.hidden_channels, c, num_layers=args.num_layers, dropout=args.dropout, heads=args.gat_heads).to(device) elif args.method == 'lp': mult_bin = args.dataset == 'ogbn-proteins' model = MultiLP(c, args.lp_alpha, args.hops, mult_bin=mult_bin) elif args.method == 'mixhop': model = MixHop(d, args.hidden_channels, c, num_layers=args.num_layers, dropout=args.dropout, hops=args.hops).to(device) elif args.method == 'gcnjk': model = GCNJK(d, args.hidden_channels, c, num_layers=args.num_layers, dropout=args.dropout, jk_type=args.jk_type).to(device) elif args.method == 'gatjk': model = GATJK(d, args.hidden_channels, c, num_layers=args.num_layers, dropout=args.dropout, heads=args.gat_heads, jk_type=args.jk_type).to(device) elif args.method == 'h2gcn': model = H2GCN(d, args.hidden_channels, c, dataset.graph['edge_index'], dataset.graph['num_nodes'], num_layers=args.num_layers, dropout=args.dropout, num_mlp_layers=args.num_mlp_layers).to(device) else: raise ValueError('Invalid method') return model
def forward(self, x): xn = normalize(x) x1 = self.block1(xn) x2 = self.block2(xn) x3 = x1 + x2 return denormalize(x3)
def forward(self, x): x1 = self.block1(normalize(x)) re = self.residual(x1) x2 = self.block2(re) x3 = self.block3(x1 + x2) return denormalize(x3)
def test_three(self): binned = normalize(bin_spectra(self.data, 10)) assert binned[280] == 0.3
shuffle=True) import sys # get a record next_text, next_label = next(iter(amz_train)) try: print "Next record shape: {}".format(next_text.shape) except AttributeError as e: print "(No shape) Text: '{}'".format(next_text) # batch training, testing sets amz_train_batch = batch_data(amz_train, normalizer_fun=lambda x: data_utils.normalize(x, max_length=300, truncate_left=True, encoding=None), transformer_fun=None) amz_test_batch = batch_data(amz_test, normalizer_fun=None,transformer_fun=None) # Spit out some sample data next_batch = amz_train_batch.next() data, label = next_batch np.set_printoptions(threshold=np.nan) print "Batch properties:" print "Shape (data): {}".format(data.shape) print "Shape (label): {}".format(label.shape) print "Type: {}".format(type(data)) print print "First record of first batch:"
def test_two(self): binned = normalize(bin_spectra(self.data, 10)) assert binned[1] == 1.0 and binned[10] == 0.5
def test_one(self): binned = normalize(bin_spectra(self.data, 1)) assert binned[1] == 0.5 and binned[500] == 1.0
def test_two(self): test_array = np.array([0, 0, 0]) normalized = normalize(test_array) assert np.all(normalized == test_array)
def tune(filepath, save_figures=False): ### Getting and processing the dataset test_X, test_Y, train_X, train_Y = data_utils.get_separate_sets(filepath) # Normalize continuous features, and aggregate categories of pets train_X['AgeuponOutcome'] = data_utils.normalize(train_X['AgeuponOutcome']) train_Y = aggregate_pet_labels(train_Y) test_X['AgeuponOutcome'] = data_utils.normalize(test_X['AgeuponOutcome']) test_Y = aggregate_pet_labels(test_Y) ### Running the tuning score_fn = accuracy_score score_fn_name = 'accuracy' print("Running KNN") if save_figures: save_path = '../graphs/pet/tuning_knn.png' else: save_path = None best_param, tuning_scores, model = hyperparameter_tuning( 'knn', score_fn_name, train_X, train_Y, save_path=save_path) test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y, 'K-Nearest Neighbour', best_param, tuning_scores) print("Running SVM (linear)") if save_figures: save_path = '../graphs/pet/tuning_svm_linear.png' else: save_path = None best_param, tuning_scores, model = hyperparameter_tuning( 'svm_linear', score_fn_name, train_X, train_Y, save_path=save_path) test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y, 'Support Vector Machines (linear)', best_param, tuning_scores) print("Running SVM (poly)") if save_figures: save_path = '../graphs/pet/tuning_svm_poly.png' else: save_path = None best_param, tuning_scores, model = hyperparameter_tuning( 'svm_poly', score_fn_name, train_X, train_Y, save_path=save_path) test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y, 'Support Vector Machines (poly)', best_param, tuning_scores) print("Running decision trees") if save_figures: save_path = '../graphs/pet/tuning_dt.png' else: save_path = None best_param, tuning_scores, model = hyperparameter_tuning( 'dec_tree', score_fn_name, train_X, train_Y, save_path=save_path) test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y, 'Decision Trees', best_param, tuning_scores) print("Running boosting") if save_figures: save_path = '../graphs/pet/tuning_boosting.png' else: save_path = None best_param, tuning_scores, model = hyperparameter_tuning( 'boosting', score_fn_name, train_X, train_Y, save_path=save_path) test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y, 'Adaboost', best_param, tuning_scores) print("Running neural networks") if save_figures: save_path = '../graphs/pet/tuning_neuraln.png' else: save_path = None best_param, tuning_scores, model = hyperparameter_tuning( 'neural_n', score_fn_name, train_X, train_Y, save_path=save_path) test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y, 'Neural Nets', best_param, tuning_scores) plt.show()
args = parser.parse_args() # get training and testing sets, and their sizes for amazon. # this HDF5 file uses an 80/20 train/test split and lives at /data/pcallier/amazon (amtr, amte), (amntr, amnte) = datasets, sizes = batch_data.split_data( None, h5_path=args.h5_path, overwrite_previous=False, in_memory=False, shuffle=True) import sys # batch training, testing sets am_train_batch = batch_data.batch_data(amtr, normalizer_fun=lambda x: data_utils.normalize(x[0], max_length=300, truncate_left=True), transformer_fun=None) am_test_batch = batch_data.batch_data(amte, normalizer_fun=None,transformer_fun=None) # Spit out some sample data next_batch = am_train_batch.next() data, label = next_batch np.set_printoptions(threshold=np.nan) print "Batch properties:" print "Length: {}".format(len(data)) print "Type: {}".format(type(data)) print print "First record of first batch:" print "Type (1 level in): {}".format(type(data[0]))