Python SVMの例、models.SVM Pythonの例

コード例 #1

0

ファイルを表示

ファイル: comparison.py プロジェクト: dmayb/IML.HUJI

def question10():
    perceptron = Perceptron()
    svm = SVM()
    lda = LDA()
    repeat = 500
    mean_accuracies = np.empty((3, len(all_m)))
    accuracies = np.empty((3, repeat))
    for i, m in enumerate(all_m):
        for j in range(repeat):
            X, yx = draw_points_until_two_classes(m)  # training set
            Z, yz = draw_points_until_two_classes(k)  # test set
            perceptron.fit(X, yx)
            svm.fit(X, yx)
            lda.fit(X, yx)
            accuracies[0, j] = perceptron.score(Z, yz)["accuracy"]
            accuracies[1, j] = svm.score(Z, yz)["accuracy"]
            accuracies[2, j] = lda.score(Z, yz)["accuracy"]
        mean_accuracies[:, i] = accuracies.mean(axis=1)

    models = ["Perceptron", "SVM", "LDA"]
    colors = ['blue', 'red', 'green']
    fig = plt.figure()
    for i, model in enumerate(models):
        plt.plot(all_m, mean_accuracies[i, :], color=colors[i], label=model)
        plt.legend()

    plt.title("Q10: mean accuracy as function of m")
    fig.savefig("q10.png", bbox_inches='tight', pad_inches=0.2, dpi=fig.dpi)

コード例 #2

0

ファイルを表示

ファイル: character_classification.py プロジェクト: sn00tdogg/gangsta_project

def character_classification():
    print('Loading data...')
    x, y = load_data_chars()
    print('Processing data..')
    print('Training data shape: ', x.shape)
    print('Test data shape: ', y.shape)
    plots.plot_filters(x[0])
    SVM.svm(x, y)
    Naive_Bayes.naive_bayes(x, y)
    KNN.knn(x, y)
    CNN.fit_cnn(x, y, trials=1, network_type='simple')

コード例 #3

0

ファイルを表示

ファイル: main.py プロジェクト: isabelcachola/ML-Fall2020-Final-Project

def train(args):
    """
    This function trains the models
    :param args: the command line arguments defining the desired actions
    """

    # load data
    train_data_all, dev_data_all, _ = load(args.data_dir,
                                           cachedir=args.cachedir,
                                           override_cache=args.override_cache,
                                           text_only=(args.model.lower()
                                                      in ["bi-lstm", "bert"]),
                                           include_tfidf=args.include_tfidf,
                                           balanced=args.balanced)
    train_data, train_labels = train_data_all.X, train_data_all.y
    dev_data, dev_labels = dev_data_all.X, dev_data_all.y

    # Build model
    apx = get_appendix(args.include_tfidf, args.balanced)
    if args.model.lower() == "simple-ff":
        model = FeedForward(args.ff_hunits, train_data.shape[1])
        train_pytorch(args,
                      model,
                      train_data,
                      train_labels,
                      dev_data,
                      dev_labels,
                      save_model_path=f"models/simple-ff{apx}.torch")
    elif args.model.lower() == "bi-lstm":
        model = BiLSTM(epochs=args.num_epochs,
                       batch_size=args.batch_size,
                       max_seq_len=args.max_seq_len)
        model.train(train_data, train_labels, dev_data, dev_labels)
    elif args.model.lower() == "logreg":
        model = LogisticRegression()
        model.train(train_data,
                    train_labels,
                    dev_data,
                    dev_labels,
                    save_model_path=f"models/logreg{apx}.pkl")
    elif args.model.lower() == "majority-vote":
        model = MajorityVote()
        model.train(train_labels, dev_labels)
    elif args.model.lower() == "bert":
        model = Bert(epochs=args.num_epochs,
                     batch_size=args.batch_size,
                     max_seq_len=args.max_seq_len,
                     learning_rate=args.learning_rate)
        model.train(train_data,
                    train_labels,
                    dev_data,
                    dev_labels,
                    save_model_path=f"models/bert.pkl")
    elif args.model.lower() == "svm":
        model = SVM()
        model.train(train_data,
                    train_labels,
                    save_model_path=f"models/svm{apx}.sav")
    else:
        raise Exception("Unknown model type passed in!")

コード例 #4

0

ファイルを表示

def build_model(model_name):
    if model_name == "dtree":
        return DTree()
    elif model_name == "svm":
        return SVM()
    else:
        print("No model")
        exit(-1)

コード例 #5

0

ファイルを表示

def comparing_models(X_train, X_test, y_train, y_test):
    AdaBoost(X_train, X_test, y_train, y_test)
    Logistic_Regression(X_train, X_test, y_train, y_test)
    NaiveBayes(X_train, X_test, y_train, y_test)
    XGBoost(X_train, X_test, y_train, y_test)
    RandomForest(X_train, X_test, y_train, y_test)
    SVM(X_train, X_test, y_train, y_test)
    NeuralNetwork(X_train, X_test, y_train, y_test)

コード例 #6

0

ファイルを表示

 def one_iteraion(m):
     _modes = [Perceptron(), SVM(), LDA()]
     X, y, _f = genrate_real_plane(m)
     ret = []
     for _model in _modes:
         _model.fit(deepcopy(X), y)
         Z, _ = draw_points(k)
         ret.append(accur(_model, _f, Z))
     return np.array(ret)

コード例 #7

0

ファイルを表示

def question14(x_train, y_train, x_test, y_test):
    all_m = [50, 100, 300, 500]
    repeat = 50
    n_neighbors = 4  # after checking some values, this turned out to be the best one
    max_depth = 8  # after checking some values, this turned out to be the best one
    models = [
        Logistic(),
        SVM(),
        DecisionTree(max_depth=max_depth),
        NearestNeighbors(n_neighbors=n_neighbors)
    ]
    models_names = [
        "Logistic", "SVM", "Decision Tree, depth=" + str(max_depth),
        "Nearest Neighbors, neighbors=" + str(n_neighbors)
    ]

    # models_names = ["Logistic", "SVM"]
    # models = [Logistic(), SVM()]
    # for i in range(3,9):
    #     models.append(DecisionTree(max_depth=i))
    #     models_names.append(["decision " + str(i)])
    #     models.append(NearestNeighbors(n_neighbors=i))
    #     models_names.append(["Neighbors " + str(i)])

    mean_accuracies = np.empty((len(models), len(all_m)))
    accuracies = np.empty((len(models), repeat))
    running_time = np.zeros((len(models), len(all_m)))
    Z, yz = x_test, y_test  # test set
    for i, m in enumerate(all_m):
        for j in range(repeat):
            X, yx = draw_points_until_two_classes(x_train, y_train,
                                                  m)  # training set
            for k, model in enumerate(models):
                start = time()
                model.fit(X, yx)
                accuracies[k, j] = model.model.score(Z, yz)
                end = time()
                running_time[k, i] += (end - start)
        mean_accuracies[:, i] = accuracies.mean(axis=1)

    colors = ['blue', 'red', 'green', 'orange']
    fig = plt.figure()

    for i, model in enumerate(models_names):
        # plt.plot(all_m, mean_accuracies[i, :], color=colors[i], label=model)
        plt.plot(all_m, mean_accuracies[i, :], label=model)

    plt.legend()
    fig.suptitle("Q14: mean accuracy as function of m")
    fig.savefig("q14.png", bbox_inches='tight', pad_inches=0.1, dpi=fig.dpi)

    running_time = running_time / repeat
    print(
        pd.DataFrame(running_time, models_names,
                     ["m=" + str(m) for m in all_m]))

コード例 #8

0

ファイルを表示

ファイル: comparison.py プロジェクト: dmayb/IML.HUJI

def question9():
    perceptron = Perceptron()
    svm = SVM()
    fig = plt.figure()
    plt.suptitle("Q9: True vs. Perceptron vs. SVM hyperplanes")
    for i, m in enumerate(all_m):
        X, y = draw_points_until_two_classes(m)
        svm.fit(X, y)

        ax = fig.add_subplot(2, 3, i + 1)
        ax.scatter(X[y == -1, 0], X[y == -1, 1], color="blue",
                   label="y=-1")  # first class, labeled -1
        ax.scatter(X[y == 1, 0], X[y == 1, 1], color="red",
                   label="y=1")  # second class, labeled 1

        xmin, xmax = plt.xlim()
        xx = np.linspace(xmin, xmax)

        true_hyperplane = a * xx - (b / w[1])

        # perceptron
        perceptron.fit(X, y)
        w_perc = perceptron.model[:-1]
        a_perceptron = -w_perc[0] / w_perc[1]
        b_perceptron = perceptron.model[-1] / perceptron.model[1]
        perceptron_hyperplane = a_perceptron * xx - b_perceptron

        w_svm = svm.model.coef_[0]
        a_svm = -w_svm[0] / w_svm[1]
        b_svm = svm.model.intercept_[0] / w_svm[1]
        SVM_hyperplane = a_svm * xx - b_svm

        ax.plot(xx, true_hyperplane, color="black", label="true hyperplane")
        ax.plot(xx,
                perceptron_hyperplane,
                color="green",
                label="perceptron hyperplane")
        ax.plot(xx, SVM_hyperplane, color="orange", label="svm hyperplane")
        ax.title.set_text("m=" + str(m))
        if i == 0: plt.legend()

    fig.savefig("q9.png", bbox_inches='tight', pad_inches=0.3, dpi=fig.dpi)

コード例 #9

0

ファイルを表示

ファイル: main.py プロジェクト: isabelcachola/ML-Fall2020-Final-Project

def test(args):
    """
    This function tests our models
    :param args: the command line arguments with the desired actions
    """
    _, _, test_data_all = load(args.data_dir,
                               cachedir=args.cachedir,
                               override_cache=args.override_cache,
                               text_only=(args.model.lower()
                                          in ["bi-lstm", "bert"]),
                               include_tfidf=args.include_tfidf,
                               balanced=args.balanced)
    test_data, test_labels = test_data_all.X, test_data_all.y

    apx = get_appendix(args.include_tfidf, args.balanced)
    if args.model.lower() == "simple-ff":
        preds = test_pytorch(
            test_data,
            test_labels,
            load_model_path=f"models/simple-ff{apx}.torch",
            predictions_file=f"preds/simple-ff-preds{apx}.txt")
    elif args.model.lower() == "bi-lstm":
        model = BiLSTM(load_model_path="models/bilstm.keras",
                       tokenizer_path='models/bilstm-tokenizer.json')
        preds = model.test(test_data, y_test=test_labels)
    elif args.model.lower() == "logreg":
        model = LogisticRegression(load_model_path=f"models/logreg{apx}.pkl")
        preds = model.test(
            test_data,
            test_labels,
            save_predictions_path=f"preds/logreg-preds{apx}.txt")
    elif args.model.lower() == "majority-vote":
        model = MajorityVote(load_model_path="models/majority-class.txt")
        preds = model.test(test_labels)
    elif args.model.lower() == "bert":
        model = Bert(load_model_path="models/bert.pkl")
        preds = model.test(test_data,
                           test_labels,
                           save_predictions_path="preds/bert-preds.txt")
    elif args.model.lower() == "svm":
        model = SVM(load_model_path=f"models/svm{apx}.sav")
        preds = model.test(test_data,
                           save_predictions_path=f"preds/svm-preds{apx}.txt")
    else:
        raise Exception("Unknown model type passed in!")

    metrics = classification_report(test_labels, preds, output_dict=True)
    pprint(metrics)
    with open(f"scores/{args.model.lower()}{apx}.json", "w") as fout:
        json.dump(metrics, fout, indent=4)

コード例 #10

0

ファイルを表示

    def one_iteraion(m):
        _modes = [Logistic(), DecisionTree(), KNearestNeighbor(), SVM()]
        X, y, indexs = generate(m, x_train, y_train)

        # your code

        while (0 not in y) or (1 not in y):
            X, y, indexs = generate(m, x_train, y_train)
        ret = []
        for _model in _modes:
            start_time = time.time()
            _model.fit(deepcopy(X), y)
            elapsed_time = time.time() - start_time
            print("train : {} takes {}".format(_model, elapsed_time))
            Z, _, indexs = generate(k, x_test, y_test)
            ret.append(accur(_model, indexs, Z))
        return np.array(ret)

コード例 #11

0

ファイルを表示

ファイル: crawl news and virality prediction.py プロジェクト: akshit-aggarwal/Crawl-news-information-websites-anticipate-the-likelihood-of-its-virality

def main(grid):
	# Get Clean Data
	X, Y = read_clean_data()
	# Linear Regression
	try:
		LinearRegression(X, Y, grid)
	except Exception as e:
		print(e)
	# Binarize Y
	Y_binary = BinaryY(Y)
	# Logistic Regression
	try:
		LogisticRegression(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Decision Tree
	try:
		DecisionTree(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Support Vector Machine
	try:
		SVM(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Random Forest
	try:
		RandomForest(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Bagging Classifier
	try:
		Bagging(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Neural Network
	try:
		NeuralNet(X, Y_binary, grid)
	except Exception as e:
		print(e)

コード例 #12

0

ファイルを表示

ファイル: train.py プロジェクト: ValiaDim/msc_computational_intelligence

    def perform_svm_grid_search(self, c_svm, kernel_svm):
        acc_train_svm = {}
        acc_test_svm = {}
        progress_bar = tqdm(total=len(c_svm) * len(kernel_svm),
                            desc='Grid searching for best svm')
        for kernel in kernel_svm:
            acc_train_svm[kernel] = []
            acc_test_svm[kernel] = []
            for c in c_svm:
                acc1, acc2 = SVM.train_svm(self.train_data,
                                           self.validation_data, c, kernel,
                                           self.svm_type)
                log_message = (
                    "SVM kernel: {},\t SVM c parameter: {}\n".format(
                        kernel, c))
                log_message = log_message + (
                    "Training accuracy: {},\t Validation accuracy: {}\n".
                    format(acc1, acc2))
                util.logger(log_message, self.log_folder)
                acc_train_svm[kernel].append(acc1)
                acc_test_svm[kernel].append(acc2)
                progress_bar.update(1)

        for key in acc_train_svm.keys():
            plt.clf()
            plt.plot(c_svm, acc_train_svm[key], '.-', color='red')
            plt.plot(c_svm, acc_test_svm[key], '.-', color='orange')
            plt.xlabel('c')
            plt.ylabel('Accuracy')
            plt.title(
                "Plot of accuracy vs c for training and validation data for {} kernel"
                .format(key))
            plt.grid()
            plot_save_path = os.path.join(self.plot_folder,
                                          ("svm_{}.png".format(key)))
            plt.savefig(plot_save_path)

コード例 #13

0

ファイルを表示

ファイル: vcab_occupancy_svm.py プロジェクト: martinzwm/vayyar

        loss = loss_fn(y, y_hat)  # hinge loss
        # Computes gradients
        loss.backward()
        # Updates parameters and zeroes gradients
        optimizer.step()
        optimizer.zero_grad()
        # Returns the loss
        return loss.item()
    
    # Returns the function that will be called inside the train loop
    return train_step

def hinge_loss(y, y_hat):
    return torch.mean(torch.clamp(1 - y_hat * y, min=0))

model = SVM()  # Our model
optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Our optimizer
model.train()  # Our model, SVM is a subclass of the nn.Module, so it inherits the train method
losses = []
val_losses = []
train_step = make_train_step(model, hinge_loss, optimizer)
train_per_epoch = int(len(train_set) / batch_size)

for epoch in range(n_epochs):
    sum_loss = 0
    sum_val_loss = 0
    kbar = pkbar.Kbar(target=train_per_epoch, width=8)
    for i, batch in enumerate(train_loader):
        #TODO: when have CUDA:
        #x_batch = batch['imagePower'].to(device)
        #y_batch = batch['label'].to(device)

コード例 #14

0

ファイルを表示

def run_models(words,
               models,
               verbose,
               train=True,
               test=True,
               embeddings=False):
    '''
    Runs all the models that are specified with the specified word set.
    It runs all preporocessing steps necessary for the models specified
    Note: If a model is specified twice, it will be run twice, but the preprocessing
    on the input data will not(useful to test for model parameter initialization)
    
    Returns a list containing the the objects of the models used, 
        the outputs they predicted and 
        the sklearn classification reports (dictionary format), 
        in the order where they were provided
        
    Keyword arguments:
        words: list of list of words and features. 
            Format: n*m. n=nr of words, m=nr features + expected output (single)
        models: a string containing the model names. Order is not important.
            Possible models are: NB, LR, SVM, HMM, CRF. Coming soon: CNN
            If a model is specified twice, it will be run twice. The input is
            randomized only once, where applicable
        veboose: 0: print nothing
                1: print results
                2: print status messages:
                3: print both
    '''
    # Preparing data for one-hot encodign -- converts strings into integers
    if any(i in models for i in ['NB', 'LR', 'SVM']):
        verbose | 2 and print('Initial pre-processing...')
        if embeddings:
            stems = [word[0] for word in words]
            words = [word[1:] for word in words]
        X, Y, transl, labels_num, labels_name = create_dataset(words)

    #Algorithm uses sentences (list of list of tuples): HMM
    if 'HMM' in models:
        verbose | 2 and print('Preprocessing data for HMM...')
        sentences_hmm, symbols, tag_set = words2tuples(words)
        _, y_train, _, y_test = split_tr([], sentences_hmm, 0.8)
        x_test = [[tup[0] for tup in sentence] for sentence in y_test]
        y_test = [[tup[1] for tup in sentence] for sentence in y_test]
        #shuffle_parallel(x_test,y_test)
        data_hmm = data_wrap(None, y_train, x_test, y_test)

    # Algorithms using shuffled, one-hot data:NB,LR,SVM
    if any(i in models for i in ['NB', 'LR', 'SVM']):
        verbose | 2 and print('Preprocessing data for NB, LR and/or SVM...')
        indexes = shuffle_parallel(X, Y)
        X_onehot_sh = one_hot(X, transl)
        if embeddings:
            verbose | 2 and print('Loading and generating embeddings...')
            X_onehot_sh = embeddings.insert_embeddings(X_onehot_sh, stems,
                                                       indexes)
        x_train_oh_sh, y_train_oh_sh, x_test_oh_sh, y_test_oh_sh = split_tr(
            X_onehot_sh, Y, 0.8)
        data_shuffled = data_wrap(x_train_oh_sh, y_train_oh_sh, x_test_oh_sh,
                                  y_test_oh_sh, transl, labels_num,
                                  labels_name)

    #Ordered, using sentences (list of list of dict): CRF
    if 'CRF' in models:
        verbose | 2 and print('Preprocessing data for CRF...')
        tokens_dict, labels_dict = words2dictionary(words)
        shuffle_parallel(tokens_dict, labels_dict)
        tokens_train, labels_train, tokens_test, labels_test = split_tr(
            tokens_dict, labels_dict, 0.8)
        data_dictionary = data_wrap(tokens_train, labels_train, tokens_test,
                                    labels_test)

    model_objects = []
    model_results = []
    model_predictions = []

    #removes clutter when calling the functions separately
    #Using a list of function handlers could also be used, but I find that to be
    #less intuitive
    def _add_to_output(model_y_pred):
        model_objects.append(model_y_pred[0])
        model_results.append(model_y_pred[1])
        if (len(model_y_pred) > 2):
            model_predictions.append(model_y_pred[2])

    #Run each of the models from the paramters, while KEEPING THE ORDER they were called in
    #and append it to the return lists
    for model in models:
        if 'HMM' in model:
            verbose | 2 and print('Running HMM from nltk...')
            _add_to_output(HMM(data_hmm, symbols, tag_set, verbose | 1))

        if 'NB' in model:
            verbose | 2 and print('Running NB ' +
                                  ('with ' if embeddings else 'without ') +
                                  'embeddings...')
            if embeddings:
                _add_to_output(NB_cont(data_shuffled, verbose | 1))
            else:
                _add_to_output(NB_disc(data_shuffled, verbose | 1))

        if 'LR' in model:
            verbose | 2 and print('Running LR ' +
                                  ('with ' if embeddings else 'without ') +
                                  'embeddings...')
            _add_to_output(
                LR(data_shuffled, verbose | 1, C=(0.1 if embeddings else 5)))

        if 'SVM' in model:
            verbose | 2 and print('Running SVM ' +
                                  ('with ' if embeddings else 'without ') +
                                  'embeddings...')
            _add_to_output(SVM(data_shuffled, verbose | 1))

        if 'CRF' in model:
            verbose | 2 and print('Running CRF...')
            _add_to_output(CRF(data_dictionary, verbose | 1))

    return model_objects, model_results, model_predictions

コード例 #15

0

ファイルを表示

ファイル: predict.py プロジェクト: afabijanska/Enhanced_ECT_Resolution

with open(path_test_data, 'rb') as f:
    X_test = pickle.load(f)
    Y_test = pickle.load(f)
    test_fantom_labels = pickle.load(f)
    test_reconstr_labels = pickle.load(f)

X = np.ones((X_test.shape[0], 32768, 4), dtype=np.float16)
Y = np.ones((X_test.shape[0], 32768), dtype=np.float16)

X[:, 0:X_test.shape[1]] = X_test
X = np.reshape(X, (X.shape[0], X.shape[1], 4))

#load model

model = SVM((X.shape[1], 4))
#model = model_from_json(open('model.json').read())
model.load_weights(path_best_weights)

#predict

Y_pred = model.predict(X)
Y_pred = Y_pred[:, 0:25350]

Y_pred_hard = 2 * np.argmax(Y_pred, axis=-1) + 1
Y_pred_soft = 2 * Y_pred[:, :, 1] + 1

#place for predictions

predicted_phantoms_hard = sio.loadmat(path_predicted_phantoms_hard)
phantoms_data = predicted_phantoms_hard['PhantomDataBase']

コード例 #16

0

ファイルを表示

      preprocessing.samples_statistics(train_samples, _classes, get_question))
print("Test set distribution:",
      preprocessing.samples_statistics(test_samples, _classes, get_question))

train_texts = [sample.text for sample in train_samples]
test_texts = [sample.text for sample in test_samples]
train_matrix, test_matrix, words = preprocessing.preprocess(
    train_texts, test_texts, words_src="samples", normalize_flag=False)

if _model == "SVM":
    train_labels = preprocessing.samples_to_label(train_samples, _classes,
                                                  get_question)
    test_labels = preprocessing.samples_to_label(test_samples, _classes,
                                                 get_question)

    model = SVM()
    model.train(train_matrix, train_labels)
    predict = model.predict(test_matrix)

elif _model == "NN":
    train_dists = preprocessing.samples_to_dists(train_samples, _classes,
                                                 get_question)
    test_dists = preprocessing.samples_to_dists(test_samples, _classes,
                                                get_question)
    model = Neural_Network(_n_factors=train_matrix.shape[1],
                           _learning_rate=_learning_rate,
                           _hidden_nodes=_hidden_nodes,
                           _last_layer=len(_classes))
    model.train(train_matrix, train_dists, test_matrix, test_dists)
    predict = model.predict(test_matrix)
    predict = preprocessing.dists_to_labels(predict, _classes)

コード例 #17

0

ファイルを表示

ファイル: start.py プロジェクト: azouaoui-cv/KMML_challenge

# Do SVM with Gaussian Kernel predictions

results0 = np.zeros(3000)
len_files = len(FILES)

for i in range(len_files):

    γ = gamma_list[i]
    λ = lambda_list[i]

    X_train, Y_train, X_test = load_data(i,
                                         data_dir=DATA_DIR,
                                         files_dict=FILES)

    kernel = GaussianKernel(γ)
    clf = SVM(_lambda=λ, kernel=kernel)
    clf.fit(X_train, Y_train)
    y_pred = clf.predict(X_test)
    results0[i * 1000:i * 1000 + 1000] = y_pred

# SAVE Results
save_results("results_SVM_gaussian.csv", results0, RESULT_DIR)
print("1/3 Ending SVM with Gaussian kernel...")

#####################################
# 2) SVM with Convolutional kernel  #
#####################################
print("2/3 Starting SVM with Convolutional kernel...")
# Define parameters lists
sigma_list = [0.31, 0.31, 0.3]
k_list = [9, 10, 11]

コード例 #18

0

ファイルを表示

ファイル: albl_training.py プロジェクト: cyber00rn/MalwareALBL

def main(DATA_NUM, DATA_PATH, TRAIN_TIMES, N_LABELED, TEST_SIZE, N_CLASS):
    # read training data
    logger.info('Read kaggle training data')
    data = pd.read_csv(DATA_PATH, index_col=0)
    logger.info('The train shape : {}'.format(data.shape))

    # read testing data
    # logger.info('Read kaggle testing data')
    # testing_data_set = pd.read_csv(testing_data_path, index_col=0)
    # logger.info('The test shape : {}'.format(testing_data_set.shape))

    # Comparing UncertaintySampling strategy with RandomSampling.
    # model is the base learner, e.g. LogisticRegression, SVM ... etc.

    columns = list(data.columns)
    columns.remove('Class')
    columns.remove('Id')

    scaler = StandardScaler()

    results = []
    results_loss = []
    for T in range(TRAIN_TIMES):  # repeat the experiment T times
        logger.info("%dth experiment" % (T + 1))

        if DATA_NUM != 0:
            random_data = data.sample(n=DATA_NUM,
                                      weights='Class',
                                      random_state=T + 1)
            random_data.to_csv('random_data_sample.csv', encoding='utf-8')
            logger.info(random_data.head())
        else:
            random_data = copy.deepcopy(data)
            del data
            gc.collect()

        x = random_data.loc[:, columns].values
        x = scaler.fit_transform(x)
        x = np.concatenate((random_data['Id'].values.reshape(-1, 1), x),
                           axis=1)
        y = random_data.loc[:, 'Class'].values

        trn_ds, tst_ds, y_train, fully_labeled_trn_ds = split_train_test(
            x, y, TEST_SIZE, N_LABELED, N_CLASS)
        lbr = IdealLabeler(fully_labeled_trn_ds)

        quota = len(y_train) - N_LABELED  # number of samples to query

        logger.info(quota)

        start_time = time.time()
        logger.info('Running ALBL')
        qs = ActiveLearningByLearning(
            trn_ds,
            query_strategies=[
                UncertaintySampling(trn_ds,
                                    model=SVM(kernel='linear',
                                              decision_function_shape='ovr')),
                QUIRE(trn_ds),
                RandomSampling(trn_ds)
                # HintSVM(trn_ds, cl=1.0, ch=1.0), ## only support binary class
            ],
            T=quota,
            uniform_sampler=True,
            model=SVM(kernel='linear', decision_function_shape='ovr'))

        model = SVM(kernel='linear', decision_function_shape='ovr')
        _, E_out_5, loss = run(trn_ds, tst_ds, lbr, model, qs, quota, T)
        end_time = time.time()
        logger.info('ALBL finish. {} s'.format(end_time - start_time))

        results.append(E_out_5.tolist())
        results_loss.append(np.array(loss))

    ### save albl error value and validation loss
    pickle.dump(results, open('albl_error.pkl', 'wb'))
    pickle.dump(results_loss, open('albl_Loss_record.pkl', 'wb'))

    # Plot the learning curve of UncertaintySampling to RandomSampling
    # The x-axis is the number of queries, and the y-axis is the corresponding
    # error rate.
    fig = plt.figure()
    plt.plot(np.mean(results, axis=0), 'c', label='ALBL')
    plt.xlabel('Number of Queries')
    plt.ylabel('Error')
    fig.suptitle('ALBL Experiment Result')
    plt.legend(loc='upper right')
    # plt.xticks(np.arange(0, DATA_NUM//100, step=1))
    fig.savefig('./loss_figure/albl_loss_kaggle_malware.png')

    # plt.show()

    fig2 = plt.figure()
    plt.plot(np.mean(results_loss, axis=0), 'c', label='val_loss')
    plt.xlabel('Validation Times')
    plt.ylabel('Loss')
    fig2.suptitle('Validation Loss Result')
    plt.legend(loc='upper right')
    plt.xticks(np.arange(0, DATA_NUM // 100, step=1))
    fig2.savefig('./loss_figure/kaggle_loss.png')

コード例 #19

0

ファイルを表示

ファイル: main.py プロジェクト: senakicir/ntds_project

def test_everything(args):
    ## Get features, labels, training and testing set, adjacency
    args, file_names, stat_dirname, features, gt_labels, genres, adjacency, indx_train, indx_test, pygsp_graph, release_dates = load_parameters_and_data(
        args)

    if args.graph_statistics:
        if not os.path.exists(stat_dirname):
            os.makedirs(stat_dirname)

        if args.graph_statistics == 'all':
            ## Prints out all statistics about graph
            gstats.allstats(adjacency, stat_dirname, active_plots=False)
        elif args.graph_statistics == 'advanced':
            ## Prints out all advanced statistics
            gstats.advanced(adjacency,
                            stat_dirname,
                            active_plots=args.plot_graph)
        else:  # basic setting
            ## Prints out basic statistics
            gstats.basic(adjacency)
        gstats.growth_analysis(adjacency, release_dates, gt_labels,
                               stat_dirname)

    if args.inductive_learning:
        print('#### Testing Inductive Learning ####')
        if args.additional_models:
            ## Initialize models with correct parameters
            svm_clf = SVM(features,
                          gt_labels,
                          kernel='linear',
                          seed=SEED,
                          save_path=file_names)
            random_forest_clf = Random_Forest(features,
                                              gt_labels,
                                              n_estimators=100,
                                              max_depth=20,
                                              seed=SEED,
                                              save_path=file_names)
            knn_clf = KNN(features, gt_labels, save_path=file_names)

            error_svm = simple_test(svm_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "svm_")
            print('* SVM simple test error: {:.2f}'.format(error_svm))

            error_rf = simple_test(random_forest_clf,
                                   indx_test,
                                   classes=genres,
                                   name=file_names + "rf_")
            print('* Random Forest simple test error: {:.2f}'.format(error_rf))

            error_knn = simple_test(knn_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "knn_")
            print('* KNN simple test error: {:.2f}'.format(error_knn))

        if args.gcn:
            ## Initialize GCN with correct parameters
            gnn_clf = GCN(nhid=[1200, 100],
                          dropout=0.1,
                          adjacency=adjacency,
                          features=features,
                          labels=gt_labels,
                          n_class=len(genres),
                          cuda=args.use_cpu,
                          regularization=None,
                          lr=0.01,
                          weight_decay=5e-4,
                          epochs=300,
                          batch_size=10000,
                          save_path=file_names)
            error_gnn = simple_test(gnn_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "gnn_")
            print('* GCN simple test error: {:.2f}'.format(error_gnn))
        if args.gcn_khop:
            ## Initialize GCN K-Hop with correct parameters
            gnn_clf = GCN_KHop(nhid=[1200, 100],
                               dropout=0.1,
                               adjacency=adjacency,
                               features=features,
                               labels=gt_labels,
                               n_class=len(genres),
                               khop=2,
                               cuda=args.use_cpu,
                               regularization=None,
                               lr=0.01,
                               weight_decay=5e-4,
                               epochs=300,
                               batch_size=10000,
                               save_path=file_names)
            error_gnn = simple_test(gnn_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "gnn_khop_")
            print('* GCN KHop simple test error: {:.2f}'.format(error_gnn))
        if args.mlp_nn:
            ## Initialize MLP with correct parameters
            mlp_nn = MLP_NN(hidden_size=100,
                            features=features,
                            labels=gt_labels,
                            num_epoch=10,
                            batch_size=100,
                            num_classes=len(genres),
                            save_path=file_names,
                            cuda=args.use_cpu)
            error_mlpNN = simple_test(mlp_nn,
                                      indx_test,
                                      classes=genres,
                                      name=file_names + "mlpNN_")
            print('* MLP NN simple test error: {:.2f}'.format(error_mlpNN))

コード例 #20

0

ファイルを表示

ファイル: train.py プロジェクト: FannySB/BinaryClassificationNLP

if __name__ == "__main__":
    train_data = []
    train_label = []
    
    load_data = []
    for file in config.data_files:
        load_data.append(LoadData(file))

    for cpt in range(len(load_data)):
        train_x, train_y = load_data[cpt].getTrainData()
        train_data += train_x
        train_label += train_y

    nb_model_nb = NaiveBayes(train_data, train_label)
    nb_model_svm = SVM(train_data, train_label)

    # Save Naive Bayes Model
    nb_pickle = open(config.naive_bayes_path, 'wb')
    pickle.dump(nb_model_nb, nb_pickle)
    nb_pickle.close()

    # Save SVM Model
    svm_pickle = open(config.SVM_path, 'wb')
    pickle.dump(nb_model_nb, svm_pickle)
    svm_pickle.close()

    valid_data = []
    valid_label = []
    for cpt in range(len(load_data)):
        valid_x, valid_y = load_data[cpt].getTestData()

コード例 #21

0

ファイルを表示

# Paths to training and testing set
TRAINING_SET = '../resources/csv/training_set.csv'
TEST_SET = '../resources/csv/test_set.csv'

# Path to export predictions
DESTINATION = '../products/'

# Fingerprint transformation
FINGERPRINT = fingerprints.morgan()

# Model to train
MODEL = ConsensusClassifier([
    KNN(n_neighbors=17),
    MLP(random_state=0),
    SVM(gamma='auto', random_state=0, probability=True),
    RFC(500, random_state=0)
])

########
# Main #
########

if __name__ == '__main__':
    # Load training and test set
    LS = utils.load_from_csv(TRAINING_SET)
    TS = utils.load_from_csv(TEST_SET)

    # Create fingerprint features and output of learning set
    X_LS = fingerprints.transform(LS['SMILES'].values, FINGERPRINT)
    y_LS = LS['ACTIVE'].values

コード例 #22

0

ファイルを表示

    def __init__(self):
        self.resource_folder = get_resource_path()
        # for dataset_name in sorted(os.listdir(folder)):
        #     if dataset_name.endswith('.csv'):
        #         print(dataset_name[:-4])
        self.pipelines = {
            'credit-g': (
                'credit-g/dataset_31_credit-g.csv', 'class',
                CreditGPipeline()),
            'wine-quality': (
                'wine-quality/wine-quality-red.csv', 'class',
                WineQualityPipeline()),
            'wq-missing': (
                'wine-quality/wine-quality-red.csv', 'class',
                WineQualityMissingPipeline()),
            'abalone': (
                'abalone/abalone.csv', 'Rings',
                AbalonePipeline()),
            'adult': (
                'adult/adult.csv', 'class',
                AdultPipeline()),
            'adult-missing': (
                'adult/adult.csv', 'class',
                AdultMissingPipeline()),
            'heart': (
                'heart/heart.csv', 'class',
                HeartPipeline())}

        self.classifiers = {
            'dtc': DecisionTree(),
            'rfc40': RandomForest(size=40),
            'ertc40': ExtremelyRandomizedTrees(size=40),
            'xgb': XGB(),
            'svm': SVM(),
            'lsvm': LinearSVM(),
            'knn': KNN(n_neighbors=7),
            'logreg': LogRegression(),
            'gaus': GausNB(),
            'brfc40': BaggingRandomForest(size=40),
            'mlpc': MLPC(input_size=[16, 32, 16, 8])
        }

        self.error_gens = {
            'numeric anomalies': (
                Anomalies(), lambda x: x.dtype in [DataType.INTEGER,
                                                   DataType.FLOAT]),
            'typos': (
                Typos(), lambda x: x.dtype == DataType.STRING),
            'explicit misvals': (
                ExplicitMissingValues(), lambda x: True),
            'implicit misvals': (
                ImplicitMissingValues(), lambda x: True),
            'swap fields': (
                SwapFields(), lambda x: True)}

        self.params = [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8]

        self.tests = {'num disc': lambda x: (x.scale == DataScale.NOMINAL
                                             and x.dtype in [DataType.INTEGER,
                                                             DataType.FLOAT]),
                      'num cont': lambda x: (x.scale == DataScale.NOMINAL
                                             and x.dtype in [DataType.INTEGER,
                                                             DataType.FLOAT]),
                      'string': lambda x: x.dtype == DataType.STRING}

        self.results = Table(rows=sorted(self.pipelines.keys()),
                             columns=sorted(self.classifiers.keys()),
                             subrows=self.tests.keys(),
                             subcolumns=self.error_gens.keys())

コード例 #23

0

ファイルを表示

ファイル: news_classification_style.py プロジェクト: clarkwkw/GEStatProj

print("Train set distribution:", preprocessing.samples_statistics(train_samples, _sections, get_section))
print("Test set distribution:", preprocessing.samples_statistics(test_samples, _sections, get_section))

train_texts = [sample.text for sample in train_samples]
test_texts = [sample.text for sample in test_samples]

tfidf_vectorizer = get_tfidfVectorizer_of_essay_top_tf_words()
print("Vectorizer built..")
train_matrix, test_matrix, words = preprocessing.preprocess(train_texts, test_texts, savedir = _save_dir, words_src = tfidf_vectorizer, normalize_flag = False, reduction = _reduction, reduce_n_attr = _reduce_n_attr,  stem_words = _stem_words)
model = None
print("Generating labels..")
if _model == "SVM":
	train_labels = preprocessing.samples_to_label(train_samples, _sections, get_section)
	test_labels = preprocessing.samples_to_label(test_samples, _sections, get_section)

	model = SVM()
	print("Training.. ")
	model.train(train_matrix, train_labels)
	predict = model.predict(test_matrix)

elif _model == "NN":
	train_dists = preprocessing.samples_to_dists(train_samples, _sections, get_section)
	test_dists = preprocessing.samples_to_dists(test_samples, _sections, get_section)
	model = Neural_Network(_n_factors = train_matrix.shape[1], _learning_rate = _learning_rate, _hidden_nodes = _hidden_nodes, _last_layer = len(_sections))
	print("Training.. ")
	model.train(train_matrix, train_dists, test_matrix, test_dists, max_iter = _max_iter)
	predict = model.predict(test_matrix)
	predict = preprocessing.dists_to_labels(predict, _sections)
	test_labels = preprocessing.samples_to_label(test_samples, _sections, get_section)

else:

コード例 #24

0

ファイルを表示

def analyze_clssifiers():

    # def generate_line_prec(prec):

    def plot(prec, _svm, X, y):
        plt.scatter(X[y == -1][:, 0], X[y == -1][:, 1])
        plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1])

        min_x, max_x = min(X[:, 0]), max(X[:, 0])
        min_y, max_y = min(X[:, 1]), max(X[:, 1])
        print(min_x, max_x)

        _min_range, _max_range = min(min_x, min_y), max(max_x, max_y)
        print(prec.W)
        xx = [_min_range, _max_range]

        def get_y(W, _x):
            return -(W[0] + _x * W[1]) / W[2] if W[2] != 0 else -W[0]

        def get_y_prep(_x):
            return get_y(prec.W, _x)

        def get_y_svm(_x):
            print(_svm.coef_()[0])
            return get_y(_svm.coef_()[0], _x)

        def get_true_y(_x):
            return 0.1 / 0.5 + 0.3 / 0.5 * _x

        plt.xlim([_min_range, _max_range])
        plt.ylim([_min_range, _max_range])

        middle = (_min_range + _max_range) / 2

        def print_line(msg, _f, _color):
            xx = [_min_range, _max_range]
            yy = [_f(_x) for _x in xx]
            _x = middle + 2 * (0.5 - random())
            _y = _f(_x)
            plt.plot(xx, yy, color=_color)
            plt.annotate(msg,
                         color=_color,
                         xy=(_x, _y),
                         xycoords='data',
                         xytext=(_x + 0.3, _y),
                         textcoords='data',
                         arrowprops=dict(arrowstyle="->"))

        print_line("prep", get_y_prep, "C5")
        print_line("svm", get_y_svm, "C4")
        print_line("true plane", get_true_y, "C2")
        plt.title("svm vs prep")
        plt.xlabel("x)")
        plt.ylabel("y")
        plt.show()

    for m in [5, 10, 15, 25, 70]:
        X, y = draw_points(m)
        blues, reds = X[y == 1], X[y == -1]
        _modes = [Perceptron(), SVM()]
        for _model in _modes:
            _model.fit(deepcopy(X), y)

        plot(_modes[0], _modes[1], X, y)

コード例 #25

0

ファイルを表示

ファイル: main.py プロジェクト: joshrutta/courses

                # test the model with linear features
                y_hat = mdl.predict(x_norm_valid)

                # get metrics
                recall, precision, f1 = metrics(y_valid, y_hat)
                save_result(t, 'lr', 'linear', recall, precision, f1, C=None, gamma=p['gamma'])

                # print result
                t_elapsed = time.time() - t_start
                print('Logistic Regression w/ gamma = {:.2e}'.format(p['gamma'],) +
                      ' | Precision = {:.4f}, Recall = {:.4f}, F1 = {:.4f},  '.format(precision, recall, f1) +
                      ' | Time = {:.2f} seconds'.format(t_elapsed))

        # loop over svm model parameter space
        mdl = SVM()
        params = mdl.hyper_parameters()
        for p in params:

            # train the model with linear features
            t_start = time.time()
            success = mdl.train(x_norm_train, y_train, C=p['C'], mode='primal')

            # did we succeed?
            if success:

                # test the model
                y_hat = mdl.predict(x_norm_valid)

                # get metrics
                recall, precision, f1 = metrics(y_valid, y_hat)

コード例 #26

0

ファイルを表示

ファイル: main.py プロジェクト: senakicir/ntds_project

def train_everything(args):
    ## Get features, labels, training and testing set, adjacency
    args, file_names, stat_dirname, features, gt_labels, genres, adjacency, indx_train, indx_test, pygsp_graph, release_dates = load_parameters_and_data(
        args)

    if args.inductive_learning:
        print('#### Applying Inductive Learning ####')

        if args.additional_models:
            ## Initialize model with correct parameters
            svm_clf = SVM(features,
                          gt_labels,
                          kernel='linear',
                          seed=SEED,
                          save_path=file_names)
            random_forest_clf = Random_Forest(features,
                                              gt_labels,
                                              n_estimators=100,
                                              max_depth=20,
                                              seed=SEED,
                                              save_path=file_names)
            knn_clf = KNN(features, gt_labels, save_path=file_names)

            start = time.time()
            mean_error_svm, std_error_svm = cross_validation(svm_clf,
                                                             indx_train,
                                                             K=5,
                                                             classes=genres,
                                                             name=file_names +
                                                             "svm_")
            print('* SVM cross validation error mean: {:.2f}, std: {:.2f}'.
                  format(mean_error_svm, std_error_svm))
            print("SVM time", time.time() - start)

            start = time.time()
            mean_error_rf, std_error_rf = cross_validation(random_forest_clf,
                                                           indx_train,
                                                           K=5,
                                                           classes=genres,
                                                           name=file_names +
                                                           "rf_")
            print(
                '* Random Forest cross validation error mean: {:.2f}, std: {:.2f}'
                .format(mean_error_rf, std_error_rf))
            print("RF time", time.time() - start)

            start = time.time()
            mean_error_knn, std_error_knn = cross_validation(knn_clf,
                                                             indx_train,
                                                             K=5,
                                                             classes=genres,
                                                             name=file_names +
                                                             "knn_")
            print('* KNN cross validation error mean: {:.2f}, std: {:.2f}'.
                  format(mean_error_knn, std_error_knn))
            print("KNN time", time.time() - start)

        if args.gcn:
            print("Training GCN")
            start = time.time()
            ## Initialize GCN with correct parameters
            gnn_clf = GCN(nhid=[1200, 100],
                          dropout=0.1,
                          adjacency=adjacency,
                          features=features,
                          labels=gt_labels,
                          n_class=len(genres),
                          cuda=args.use_cpu,
                          regularization=None,
                          lr=0.01,
                          weight_decay=5e-4,
                          epochs=300,
                          batch_size=10000,
                          save_path=file_names)
            train_gcn(gnn_clf, indx_train, name=file_names + "gnn_")
            print("GCN time", time.time() - start)

        if args.gcn_khop:
            print("Training GCN K-Hop")
            start = time.time()
            ## Initialize GCN K-Hop with correct parameters
            gnn_clf = GCN_KHop(nhid=[1200, 100],
                               dropout=0.1,
                               adjacency=adjacency,
                               features=features,
                               labels=gt_labels,
                               n_class=len(genres),
                               khop=2,
                               cuda=args.use_cpu,
                               regularization=None,
                               lr=0.01,
                               weight_decay=5e-4,
                               epochs=300,
                               batch_size=10000,
                               save_path=file_names)
            train_gcn(gnn_clf, indx_train, name=file_names + "gnn_khop_")
            print("GCN K-Hop time", time.time() - start)

        if args.mlp_nn:
            start = time.time()
            ## Initialize MLP with correct parameters
            mlp_nn = MLP_NN(hidden_size=100,
                            features=features,
                            labels=gt_labels,
                            num_epoch=100,
                            batch_size=100,
                            num_classes=len(genres),
                            save_path=file_names,
                            cuda=args.use_cpu)
            mean_error_mlpNN, std_error_mlpNN = cross_validation(
                mlp_nn,
                indx_train,
                K=5,
                classes=genres,
                name=file_names + "mlpNN_")
            print('* MLP NN cross validation error mean: {:.2f}, std: {:.2f}'.
                  format(mean_error_mlpNN, std_error_mlpNN))
            print("MLP time", time.time() - start)

コード例 #27

0

ファイルを表示

ファイル: clustering.py プロジェクト: clarkwkw/GEStatProj

train_samples = samples[0:int(n_samples*_train_ratio)]
test_samples = samples[int(n_samples*_train_ratio):n_samples]

print("Samples distribution:", preprocessing.samples_statistics(samples, _classes, get_question))
print("Train set distribution:", preprocessing.samples_statistics(train_samples, _classes, get_question))
print("Test set distribution:", preprocessing.samples_statistics(test_samples, _classes, get_question))

train_texts = [sample.text for sample in train_samples]
test_texts = [sample.text for sample in test_samples]
train_matrix, test_matrix, words = preprocessing.preprocess(train_texts, test_texts, words_src = "samples", normalize_flag = False)

if _model == "SVM":
	train_labels = preprocessing.samples_to_label(train_samples, _classes, get_question)
	test_labels = preprocessing.samples_to_label(test_samples, _classes, get_question)

	model = SVM()
	model.train(train_matrix, train_labels)
	predict = model.predict(test_matrix)

elif _model == "NN":
	train_dists = preprocessing.samples_to_dists(train_samples, _classes, get_question)
	test_dists = preprocessing.samples_to_dists(test_samples, _classes, get_question)
	model = Neural_Network(_n_factors = train_matrix.shape[1], _learning_rate = _learning_rate, _hidden_nodes = _hidden_nodes, _last_layer = len(_classes))
	model.train(train_matrix, train_dists, test_matrix, test_dists)
	predict = model.predict(test_matrix)
	predict = preprocessing.dists_to_labels(predict, _classes)
	test_labels = preprocessing.samples_to_label(test_samples, _classes)

else:
	raise Exception("Unknown model flag '%s'"%str(_model))

コード例 #28

0

ファイルを表示

                        mosaic(20, images))

    cv2.imwrite('out/test_set.jpg', mosaic(20, shoes_test))
    cv2.imwrite('out/train_set.jpg', mosaic(20, shoes_train))

    print 'training KNearest...'
    model = KNearest(k=4)
    model.train(samples_train, labels_train)
    vis, knearestError = evaluate_model(model, shoes_test, samples_test,
                                        labels_test)
    cv2.imwrite('out/KNearest_test_' + str(SZ) + '.jpg', vis)
    # print 'saving KNearest as "shoes_svm_' + str(SZ) + '.dat"...'
    # model.save('out/shoes_KNearest_' + str(SZ) + '.dat')

    print 'training SVM...'
    model = SVM(C=2.67, gamma=5.383)
    model.train(samples_train, labels_train)
    vis, svmError = evaluate_model(model, shoes_test, samples_test,
                                   labels_test)
    cv2.imwrite('out/SVM_test_' + str(SZ) + '.jpg', vis)
    print 'saving SVM as "shoes_svm_' + str(SZ) + '.dat"...'
    model.save('out/shoes_svm_' + str(SZ) + '.dat')

    print 'training RTrees...'
    model = RTrees()
    model.train(samples_train, labels_train)
    vis, rtreesError = evaluate_model(model, shoes_test, samples_test,
                                      labels_test)
    cv2.imwrite('out/rtrees_test_' + str(SZ) + '.jpg', vis)
    print 'saving RTrees as "shoes_rtrees_' + str(SZ) + '.dat"...'
    model.save('out/shoes_rtrees_' + str(SZ) + '.dat')

コード例 #29

0

ファイルを表示

    test_texts,
    savedir=_save_dir,
    words_src=tfidf_vectorizer,
    normalize_flag=False,
    reduction=_reduction,
    reduce_n_attr=_reduce_n_attr,
    stem_words=_stem_words)
model = None
print("Generating labels..")
if _model == "SVM":
    train_labels = preprocessing.samples_to_label(train_samples, _sections,
                                                  get_section)
    test_labels = preprocessing.samples_to_label(test_samples, _sections,
                                                 get_section)

    model = SVM()
    print("Training.. ")
    model.train(train_matrix, train_labels)
    predict = model.predict(test_matrix)

elif _model == "NN":
    train_dists = preprocessing.samples_to_dists(train_samples, _sections,
                                                 get_section)
    test_dists = preprocessing.samples_to_dists(test_samples, _sections,
                                                get_section)
    model = Neural_Network(_n_factors=train_matrix.shape[1],
                           _learning_rate=_learning_rate,
                           _hidden_nodes=_hidden_nodes,
                           _last_layer=len(_sections))
    print("Training.. ")
    model.train(train_matrix,

コード例 #30

0

ファイルを表示

    best_window_size = {i: 0 for i in range(len_files)}
    
    
    # Main loop
    for _, params in enumerate(settings):
        
        gamma, _lambda, = params
        
        if kernel_name == "Gaussian":
            kernel = GaussianKernel(gamma)

        elif kernel_name == "Linear":
            kernel = LinearKernel()

        if model_name == "SVM":
            clf = SVM(_lambda=_lambda, kernel=kernel)

        elif model_name == "SPR":
            clf = SPR(kernel=kernel)

        # Loop from pre-computed embeddings
        #for filename in os.listdir(EMBEDDING_DIR)[:1]: # small test
        for filename in os.listdir(EMBEDDING_DIR):
        
            # Full path
            file_path = os.path.join(EMBEDDING_DIR, filename)
            # Parsing
            dataset_idx, sigma, window_size = filename_parser(filename)
            # Cross validation
            results = cross_validation(dataset_idx=dataset_idx, clf=clf,
                                       data_dir=DATA_DIR, files_dict=FILES,

コード例 #31

0

ファイルを表示

ファイル: trainSVM.py プロジェクト: afabijanska/Enhanced_ECT_Resolution

X = np.ones((X_train.shape[0], 32768, 4), dtype=np.float16)
Y = np.ones((Y_train.shape[0], 32768), dtype=np.float16)

#normalize data

# X_train = (X_train-1)/2.0
# Y_train = (Y_train-1)/2.0

X[:, 0:X_train.shape[1]] = X_train
Y[:, 0:Y_train.shape[1]] = Y_train
Y = to_categorical(Y)
X = np.reshape(X, (X.shape[0], X.shape[1], 4))

#train
model = SVM((X.shape[1], 4))
model._get_distribution_strategy = lambda: None
json_string = model.to_json()
open('model.json', 'w').write(json_string)
checkpointer = ModelCheckpoint(path_best_weights,
                               verbose=1,
                               monitor='val_loss',
                               mode='auto',
                               save_best_only=True)
tbCallback = TensorBoard(log_dir='./logs',
                         histogram_freq=0,
                         write_graph=True,
                         write_images=True,
                         profile_batch=100000000)
model.fit(X,
          Y,