Beispiel #1
0
def train_and_eval_models(train_x, test_x, train_y, test_y):
    train_x, dev_x, train_y, dev_y = train_test_split(train_x,
                                                      train_y,
                                                      test_size=0.1,
                                                      random_state=42,
                                                      stratify=train_y)
    train_x, scaler = data_utils.normalize(train_x)
    dev_x, _ = data_utils.normalize(dev_x, scaler=scaler)
    test_x, _ = data_utils.normalize(test_x, scaler=scaler)
    dump(scaler, 'scaler_{}.joblib'.format(MODALITY))
    cprint('Normalized data (zero-score standardization)', 'yellow')
    print('Dataset shapes: ', train_x.shape, dev_x.shape, test_x.shape)
    print('Class distributions for train/dev/test:')
    print(np.unique(train_y, return_counts=True))
    print(np.unique(dev_y, return_counts=True))
    print(np.unique(test_y, return_counts=True))

    # code for SVM training
    #    svm = SVC(C=C, kernel=KERNEL, degree=DEGREE, gamma='auto')
    #    svm.fit(train_x, train_y)
    #    dump(svm, 'svm_{}_{}-kernel_C{}.joblib'.format(MODALITY, KERNEL, C))
    # code for MLP training
    mlp = MLPClassifier(hidden_layer_sizes=HIDDEN_LAYERS,
                        alpha=ALPHA,
                        early_stopping=True,
                        max_iter=200)
    mlp.fit(train_x, train_y)
    print(f'mlp.n_iter_: {mlp.n_iter_}')
    dump(mlp, 'mlp_{}.joblib'.format(MODALITY))
    # Decision Tree
    #    dt = tree.DecisionTreeClassifier(max_depth=None)
    #    dt.fit(train_x, train_y)
    #    print('DT information\nDepth: {}\nNumber_leaves: {}'.format(
    #        dt.get_depth(),
    #        dt.get_n_leaves()
    #    ))
    #    dump(dt, 'dt_{}.joblib'.format(MODALITY))
    # print('DT feature importances:\n{}'.format(dt.feature_importances_))
    # print(dt.feature_importances_.shape, np.min(dt.feature_importances_),
    #       np.max(dt.feature_importances_), sum(dt.feature_importances_))

    # predictions
    #    cprint('\nSVM PERFORMANCE', 'green')
    #    cprint(f'SVM parameters:\n {svm}\n', 'yellow')
    #    svm_uars = score(svm, train_x, train_y, dev_x, dev_y, test_x, test_y)
    cprint('\nMLP PERFORMANCE', 'green')
    cprint(f'MLP parameters:\n {mlp}\n', 'yellow')
    mlp_uars = score(mlp, train_x, train_y, dev_x, dev_y, test_x, test_y)
    #    cprint('\nDT PERFORMANCE', 'green')
    #    cprint(f'DT parameters:\n {dt}\n', 'yellow')
    #    dt_uars = score(dt, train_x, train_y, dev_x, dev_y, test_x, test_y)
    #    return svm_uars, mlp_uars, dt_uars
    return mlp_uars
Beispiel #2
0
def combined_fill_between(axes, base_lines, x, ymin, ymax, *args, **kwargs):
    axes[0].fill_between(x, ymin, ymax, *args, **kwargs)
    base_lines = handle_base_line_exceptions(base_lines, axes)
    for ax, base_line in zip(axes[1:], base_lines):
        if base_line is not None:
            comb = data_utils.normalize(base_line, x, ymin, yerr=ymax)
            ax.fill_between(comb[0], comb[1], comb[2], *args, **kwargs)
Beispiel #3
0
def combined_plot(axes, base_lines, x, y, *args, **kwargs):
    axes[0].plot(x, y, *args, **kwargs)
    base_lines = handle_base_line_exceptions(base_lines, axes)
    for ax, base_line in zip(axes[1:], base_lines):
        if base_line is not None:
            comb = data_utils.normalize(base_line, x, y)
            ax.plot(comb[0], comb[1], *args, **kwargs)
Beispiel #4
0
def learn(filepath, save_figures=False):
    X, Y = data_utils.get_full_set(filepath)
    X = data_utils.normalize(X)
    Y = data_utils.aggregate_wine_labels(Y)
    score_fn = accuracy_score

    learning.pet_learning_rate(X, Y, score_fn, save=save_figures)

    plt.show()
Beispiel #5
0
def adv_attack_template_S(img_tensor, GAN, target_sz=(127, 127)):
    '''adversarial attack to template'''
    '''input: pytorch tensor(0,255) ---> output: pytorch tensor(0,255)'''
    '''step1: Normalization'''
    img_tensor = normalize(img_tensor)
    '''step2: pass to G'''
    with torch.no_grad():
        img_adv = GAN.transform(img_tensor, target_sz)
        return img_adv
Beispiel #6
0
def combined_errorbar(axes, base_lines, x, y, yerr=None, **kwargs):
    axes[0].errorbar(x, y, yerr=yerr, **kwargs)
    base_lines = handle_base_line_exceptions(base_lines, axes)
    for ax, base_line in zip(axes[1:], base_lines):
        comb = data_utils.normalize(base_line, x, y, yerr=yerr)
        if len(comb[0]) != len(comb[1]):
            raise Exception(
                "normalize gave incoherent data: len(x) != len(y): " +
                str(len(comb[0])) + " != " + str(len(comb[1])))
        ax.errorbar(comb[0], comb[1], yerr=comb[2], **kwargs)
Beispiel #7
0
def adv_attack_template(img_tensor, GAN):
    '''adversarial attack to template'''
    '''input: pytorch tensor(0,255) ---> output: pytorch tensor(0,255)'''
    '''step1: Normalization'''
    img_tensor = normalize(img_tensor)
    '''step2: pass to G'''
    with torch.no_grad():
        GAN.template_clean1 = img_tensor
        GAN.forward()
    img_adv = GAN.template_adv255
    return img_adv
Beispiel #8
0
    def forward(self, x):
        block1 = self.block1(normalize(x))
        block2 = self.block2(block1)
        block3 = self.block3(block2)
        block4 = self.block4(block3)
        block5 = self.block5(block4)
        block6 = self.block6(block5)
        block7 = self.block7(block6)
        block8 = self.block8(block1 + block7)

        return (torch.tanh(block8) + 1) / 2
Beispiel #9
0
def main(
    model_dir: str,
    vc_src: str,
    vc_tgt: str,
    adv_tgt: str,
    output: str,
    eps: float,
    n_iters: int,
    attack_type: str,
):
    assert attack_type == "emb" or vc_src is not None
    model, config, attr, device = load_model(model_dir)

    vc_tgt = file2mel(vc_tgt, **config["preprocess"])
    adv_tgt = file2mel(adv_tgt, **config["preprocess"])

    vc_tgt = normalize(vc_tgt, attr)
    adv_tgt = normalize(adv_tgt, attr)

    vc_tgt = torch.from_numpy(vc_tgt).T.unsqueeze(0).to(device)
    adv_tgt = torch.from_numpy(adv_tgt).T.unsqueeze(0).to(device)

    if attack_type != "emb":
        vc_src = file2mel(vc_src, **config["preprocess"])
        vc_src = normalize(vc_src, attr)
        vc_src = torch.from_numpy(vc_src).T.unsqueeze(0).to(device)

    if attack_type == "e2e":
        adv_inp = e2e_attack(model, vc_src, vc_tgt, adv_tgt, eps, n_iters)
    elif attack_type == "emb":
        adv_inp = emb_attack(model, vc_tgt, adv_tgt, eps, n_iters)
    elif attack_type == "fb":
        adv_inp = fb_attack(model, vc_src, vc_tgt, adv_tgt, eps, n_iters)
    else:
        raise NotImplementedError()

    adv_inp = adv_inp.squeeze(0).T
    adv_inp = denormalize(adv_inp.data.cpu().numpy(), attr)
    adv_inp = mel2wav(adv_inp, **config["preprocess"])

    sf.write(output, adv_inp, config["preprocess"]["sample_rate"])
    def set_input(self, input):
        """Unpack input data from the dataloader and perform necessary pre-processing steps.

        Parameters:
            input (dict): include the data itself and its metadata information.

        The option 'direction' can be used to swap images in domain A and domain B.
        """
        self.template_clean255 = input[0].squeeze(0).cuda() # pytorch tensor, shape=(1,3,127,127) [0,255]
        self.template_clean1 = normalize(self.template_clean255)
        # print('clean image shape:',self.init_frame_clean.size())
        self.X_crops = input[1].squeeze(0).cuda() # pytorch tensor, shape=(N,3,255,255)
Beispiel #11
0
def adv_attack_search_new(img_tensor, GAN, search_sz=(255, 255)):
    '''adversarial attack to search region'''
    '''input: pytorch tensor(0,255) ---> output: pytorch tensor(0,255)'''
    '''step1: Normalization'''
    img_tensor = normalize(img_tensor)
    '''step2: pass to G'''
    with torch.no_grad():
        GAN.tensor_clean1 = img_tensor
        GAN.num_search = img_tensor.size(0)
        GAN.forward(search_sz)
    img_adv = GAN.tensor_adv255
    return img_adv
Beispiel #12
0
 def test_one(self):
     test_array = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
     normalized = normalize(test_array)
     assert normalized[0] == 0.0 and normalized[10] == 1.0
def parse_method(args, dataset, n, c, d, device):
    if args.method == 'link':
        model = LINK(n, c).to(device)
    elif args.method == 'gcn':
        if args.dataset == 'ogbn-proteins':
            # Pre-compute GCN normalization.
            dataset.graph['edge_index'] = normalize(
                dataset.graph['edge_index'])
            model = GCN(in_channels=d,
                        hidden_channels=args.hidden_channels,
                        out_channels=c,
                        dropout=args.dropout,
                        save_mem=True,
                        use_bn=not args.no_bn).to(device)
        else:
            model = GCN(in_channels=d,
                        hidden_channels=args.hidden_channels,
                        out_channels=c,
                        num_layers=args.num_layers,
                        dropout=args.dropout,
                        use_bn=not args.no_bn).to(device)

    elif args.method == 'mlp' or args.method == 'cs':
        model = MLP(in_channels=d,
                    hidden_channels=args.hidden_channels,
                    out_channels=c,
                    num_layers=args.num_layers,
                    dropout=args.dropout).to(device)
    elif args.method == 'sgc':
        if args.cached:
            model = SGC(in_channels=d, out_channels=c,
                        hops=args.hops).to(device)
        else:
            model = SGCMem(in_channels=d, out_channels=c,
                           hops=args.hops).to(device)
    elif args.method == 'gprgnn':
        model = GPRGNN(d, args.hidden_channels, c,
                       alpha=args.gpr_alpha).to(device)
    elif args.method == 'appnp':
        model = APPNP_Net(d, args.hidden_channels, c,
                          alpha=args.gpr_alpha).to(device)
    elif args.method == 'gat':
        model = GAT(d,
                    args.hidden_channels,
                    c,
                    num_layers=args.num_layers,
                    dropout=args.dropout,
                    heads=args.gat_heads).to(device)
    elif args.method == 'lp':
        mult_bin = args.dataset == 'ogbn-proteins'
        model = MultiLP(c, args.lp_alpha, args.hops, mult_bin=mult_bin)
    elif args.method == 'mixhop':
        model = MixHop(d,
                       args.hidden_channels,
                       c,
                       num_layers=args.num_layers,
                       dropout=args.dropout,
                       hops=args.hops).to(device)
    elif args.method == 'gcnjk':
        model = GCNJK(d,
                      args.hidden_channels,
                      c,
                      num_layers=args.num_layers,
                      dropout=args.dropout,
                      jk_type=args.jk_type).to(device)
    elif args.method == 'gatjk':
        model = GATJK(d,
                      args.hidden_channels,
                      c,
                      num_layers=args.num_layers,
                      dropout=args.dropout,
                      heads=args.gat_heads,
                      jk_type=args.jk_type).to(device)
    elif args.method == 'h2gcn':
        model = H2GCN(d,
                      args.hidden_channels,
                      c,
                      dataset.graph['edge_index'],
                      dataset.graph['num_nodes'],
                      num_layers=args.num_layers,
                      dropout=args.dropout,
                      num_mlp_layers=args.num_mlp_layers).to(device)
    else:
        raise ValueError('Invalid method')
    return model
Beispiel #14
0
 def forward(self, x):
     xn = normalize(x)
     x1 = self.block1(xn)
     x2 = self.block2(xn)
     x3 = x1 + x2
     return denormalize(x3)
Beispiel #15
0
 def forward(self, x):
     x1 = self.block1(normalize(x))
     re = self.residual(x1) 
     x2 = self.block2(re)
     x3 = self.block3(x1 + x2)
     return denormalize(x3)
Beispiel #16
0
 def test_three(self):
     binned = normalize(bin_spectra(self.data, 10))
     assert binned[280] == 0.3
Beispiel #17
0
            shuffle=True)
        import sys

        # get a record
        next_text, next_label = next(iter(amz_train))

        try:
            print "Next record shape: {}".format(next_text.shape)
        except AttributeError as e:
            print "(No shape) Text: '{}'".format(next_text)


        # batch training, testing sets
        amz_train_batch = batch_data(amz_train,
            normalizer_fun=lambda x: data_utils.normalize(x, 
                max_length=300, 
                truncate_left=True,
                encoding=None),
            transformer_fun=None)
        amz_test_batch = batch_data(amz_test,
            normalizer_fun=None,transformer_fun=None)

        # Spit out some sample data
        next_batch = amz_train_batch.next()
        data, label = next_batch
        np.set_printoptions(threshold=np.nan)
        print "Batch properties:"
        print "Shape (data): {}".format(data.shape)
        print "Shape (label): {}".format(label.shape)
        print "Type: {}".format(type(data))
        print
        print "First record of first batch:"
Beispiel #18
0
 def test_two(self):
     binned = normalize(bin_spectra(self.data, 10))
     assert binned[1] == 1.0 and binned[10] == 0.5
Beispiel #19
0
 def test_one(self):
     binned = normalize(bin_spectra(self.data, 1))
     assert binned[1] == 0.5 and binned[500] == 1.0
Beispiel #20
0
 def test_two(self):
     test_array = np.array([0, 0, 0])
     normalized = normalize(test_array)
     assert np.all(normalized == test_array)
Beispiel #21
0
def tune(filepath, save_figures=False):
    ### Getting and processing the dataset
    test_X, test_Y, train_X, train_Y = data_utils.get_separate_sets(filepath)

    # Normalize continuous features, and aggregate categories of pets
    train_X['AgeuponOutcome'] = data_utils.normalize(train_X['AgeuponOutcome'])
    train_Y = aggregate_pet_labels(train_Y)
    test_X['AgeuponOutcome'] = data_utils.normalize(test_X['AgeuponOutcome'])
    test_Y = aggregate_pet_labels(test_Y)

    ### Running the tuning
    score_fn = accuracy_score
    score_fn_name = 'accuracy'

    print("Running KNN")
    if save_figures:
        save_path = '../graphs/pet/tuning_knn.png'
    else:
        save_path = None
    best_param, tuning_scores, model = hyperparameter_tuning(
        'knn', score_fn_name, train_X, train_Y, save_path=save_path)
    test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y,
                   'K-Nearest Neighbour', best_param, tuning_scores)

    print("Running SVM (linear)")
    if save_figures:
        save_path = '../graphs/pet/tuning_svm_linear.png'
    else:
        save_path = None
    best_param, tuning_scores, model = hyperparameter_tuning(
        'svm_linear', score_fn_name, train_X, train_Y, save_path=save_path)
    test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y,
                   'Support Vector Machines (linear)', best_param,
                   tuning_scores)

    print("Running SVM (poly)")
    if save_figures:
        save_path = '../graphs/pet/tuning_svm_poly.png'
    else:
        save_path = None
    best_param, tuning_scores, model = hyperparameter_tuning(
        'svm_poly', score_fn_name, train_X, train_Y, save_path=save_path)
    test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y,
                   'Support Vector Machines (poly)', best_param, tuning_scores)

    print("Running decision trees")
    if save_figures:
        save_path = '../graphs/pet/tuning_dt.png'
    else:
        save_path = None
    best_param, tuning_scores, model = hyperparameter_tuning(
        'dec_tree', score_fn_name, train_X, train_Y, save_path=save_path)
    test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y,
                   'Decision Trees', best_param, tuning_scores)

    print("Running boosting")
    if save_figures:
        save_path = '../graphs/pet/tuning_boosting.png'
    else:
        save_path = None
    best_param, tuning_scores, model = hyperparameter_tuning(
        'boosting', score_fn_name, train_X, train_Y, save_path=save_path)
    test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y,
                   'Adaboost', best_param, tuning_scores)

    print("Running neural networks")
    if save_figures:
        save_path = '../graphs/pet/tuning_neuraln.png'
    else:
        save_path = None
    best_param, tuning_scores, model = hyperparameter_tuning(
        'neural_n', score_fn_name, train_X, train_Y, save_path=save_path)
    test_and_print(model, score_fn, train_X, train_Y, test_X, test_Y,
                   'Neural Nets', best_param, tuning_scores)

    plt.show()
    args = parser.parse_args()

    # get training and testing sets, and their sizes for amazon.
    # this HDF5 file uses an 80/20 train/test split and lives at /data/pcallier/amazon
    (amtr, amte), (amntr, amnte) = datasets, sizes = batch_data.split_data(
        None, 
        h5_path=args.h5_path, 
        overwrite_previous=False,
        in_memory=False,
        shuffle=True)
    import sys

    # batch training, testing sets
    am_train_batch = batch_data.batch_data(amtr,
        normalizer_fun=lambda x: data_utils.normalize(x[0], 
            max_length=300, 
            truncate_left=True),
        transformer_fun=None)
    am_test_batch = batch_data.batch_data(amte,
        normalizer_fun=None,transformer_fun=None)
    
    # Spit out some sample data
    next_batch = am_train_batch.next()
    data, label = next_batch
    np.set_printoptions(threshold=np.nan)
    print "Batch properties:"
    print "Length: {}".format(len(data))
    print "Type: {}".format(type(data))
    print
    print "First record of first batch:"
    print "Type (1 level in): {}".format(type(data[0]))