Beispiel #1
0
def main():
    distance_funcs = {
        'euclidean': Distances.euclidean_distance,
        'minkowski': Distances.minkowski_distance,
        'cosine_dist': Distances.cosine_similarity_distance,
    }

    scaling_classes = {
        'min_max_scale': MinMaxScaler,
        'normalize': NormalizationScaler,
    }

    x_train, y_train, x_val, y_val, x_test, y_test = data_processing()

    print('x_train shape = ', x_train.shape)
    print('y_train shape = ', y_train.shape)

    # tuner_without_scaling_obj = HyperparameterTuner()
    # tuner_without_scaling_obj.tuning_without_scaling(distance_funcs, x_train, y_train, x_val, y_val)
    #
    # print("**Without Scaling**")
    # print("k =", tuner_without_scaling_obj.best_k)
    # print("distance function =", tuner_without_scaling_obj.best_distance_function)

    tuner_with_scaling_obj = HyperparameterTuner()
    tuner_with_scaling_obj.tuning_with_scaling(distance_funcs, scaling_classes, x_train, y_train, x_val, y_val)

    print("\n**With Scaling**")
    print("k =", tuner_with_scaling_obj.best_k)
    print("distance function =", tuner_with_scaling_obj.best_distance_function)
    print("scaler =", tuner_with_scaling_obj.best_scaler)
def main():
    distance_funcs = {
        'euclidean': Distances.euclidean_distance,
        'minkowski': Distances.minkowski_distance,
        'gaussian': Distances.gaussian_kernel_distance,
        'inner_prod': Distances.inner_product_distance,
        'cosine_dist': Distances.cosine_similarity_distance,
    }

    scaling_classes = {
        'min_max_scale': MinMaxScaler,
        'normalize': NormalizationScaler,
    }

    x_train, y_train, x_val, y_val, x_test, y_test = data_processing()

    print('x_train shape = ', x_train.shape)
    print('y_train shape = ', y_train.shape)
    print('x_val shape = ', x_val.shape)
    print('y_val shape = ', y_val.shape)
    print('x_test shape = ', x_test.shape)
    print('y_test shape = ', y_test.shape)

    tuner_without_scaling_obj = HyperparameterTuner()
    tuner_without_scaling_obj.tuning_without_scaling(distance_funcs, x_train, y_train, x_val, y_val)

    print("**Without Scaling**")
    print("k =", tuner_without_scaling_obj.best_k)
    print("distance function =", tuner_without_scaling_obj.best_distance_function)
    print("f1_score=", tuner_without_scaling_obj.best_f1_score)
    pred = tuner_without_scaling_obj.best_model.predict(x_test)
    correct = 0
    for i in range(len(pred)):
        if pred[i] == y_test[i]:
            correct += 1
    accuracy = float(correct)/len(pred)
    print ("Accuracy is: ", accuracy)
    print ("F1 Score: ", f1_score(y_test, pred))

    tuner_with_scaling_obj = HyperparameterTuner()
    tuner_with_scaling_obj.tuning_with_scaling(distance_funcs, scaling_classes, x_train, y_train, x_val, y_val)
#
    print("\n**With Scaling**")
    print("k =", tuner_with_scaling_obj.best_k)
    print("distance function =", tuner_with_scaling_obj.best_distance_function)
    print("scaler =", tuner_with_scaling_obj.best_scaler)
    print("f1_score=", tuner_with_scaling_obj.best_f1_score)
    pred_2 = tuner_with_scaling_obj.best_model.predict(x_test)
    correct_2 = 0
    for i in range(len(pred_2)):
        if pred_2[i] == y_test[i]:
            correct_2 += 1
    accuracy_2 = float(correct_2)/len(pred_2)
    print ("Accuracy is: ", accuracy_2)
    print ("F1 Score:", f1_score(y_test, pred_2))
Beispiel #3
0
def main():

    x_train, y_train, x_val, y_val, x_test, y_test = data_processing()

    print('x_train shape = ', x_train.shape)
    print('y_train shape = ', y_train.shape)

    scaler = MinMaxScaler()
    x_train = scaler(x_train)
    x_val = scaler(x_val)

    scaler = MinMaxScaler()
    x_test = scaler(x_test)
def main():
    distance_funcs = {
        'euclidean': Distances.euclidean_distance,
        'minkowski': Distances.minkowski_distance,
        'gaussian': Distances.gaussian_kernel_distance,
        'inner_prod': Distances.inner_product_distance,
        'cosine_dist': Distances.cosine_similarity_distance,
    }

    scaling_classes = {
        'min_max_scale': MinMaxScaler,
        'normalize': NormalizationScaler,
    }

    x_train, y_train, x_val, y_val, x_test, y_test = data_processing()

    print('x_train shape = ', x_train.shape)
    print('y_train shape = ', y_train.shape)
    print(list(distance_funcs.keys()).index('cosine_dist'))
    print(x_train[0])
    print(x_train[1])
    print(Distances.euclidean_distance(x_train[0], x_train[1]))
    print(Distances.minkowski_distance(x_train[0], x_train[1]))
    print(Distances.inner_product_distance(x_train[0], x_train[1]))
    print(Distances.cosine_similarity_distance(x_train[0], x_train[1]))
    print(Distances.gaussian_kernel_distance(x_train[0], x_train[1]))
    a = [[1], [4], [4], [10], [11], [23], [0], [50]]
    b = [[3, 4], [1, -1], [0, 0]]
    labels = [0, 0 ,0, 1, 1, 1, 0, 1]
    k = KNN(4, Distances.euclidean_distance)
    k.train(x_train, y_train)
    print(k.get_k_neighbors([2]))
    print(k.get_k_neighbors([28]))
    print(k.predict(x_val))

    tuner_without_scaling_obj = HyperparameterTuner()
    tuner_without_scaling_obj.tuning_without_scaling(distance_funcs, x_train, y_train, x_val, y_val)

    print("**Without Scaling**")
    print("k =", tuner_without_scaling_obj.best_k)
    print("distance function =", tuner_without_scaling_obj.best_distance_function)
    x = NormalizationScaler()

    tuner_with_scaling_obj = HyperparameterTuner()
    tuner_with_scaling_obj.tuning_with_scaling(distance_funcs, scaling_classes, x_train, y_train, x_val, y_val)

    print("\n**With Scaling**")
    print("k =", tuner_with_scaling_obj.best_k)
    print("distance function =", tuner_with_scaling_obj.best_distance_function)
    print("scaler =", tuner_with_scaling_obj.best_scaler)
def main():
    distance_funcs = {
        'canberra': Distances.canberra_distance,
        'minkowski': Distances.minkowski_distance,
        'euclidean': Distances.euclidean_distance,
        'gaussian': Distances.gaussian_kernel_distance,
        'inner_prod': Distances.inner_product_distance,
        'cosine_dist': Distances.cosine_similarity_distance,
    }

    x_train, y_train, x_val, y_val, x_test, y_test = data_processing()

    print('x_train shape = ', x_train.shape)
    print('y_train shape = ', y_train.shape)

    classfier = KNN(k=5, distance_function=distance_funcs['canberra'])
    classfier.train(x_train, y_train)
    pred = classfier.predict(x_val)
    s = 0
Beispiel #6
0
def main():
    distance_funcs = {
        'euclidean': Distances.euclidean_distance,
        'minkowski': Distances.minkowski_distance,
        'gaussian': Distances.gaussian_kernel_distance,
        'inner_prod': Distances.inner_product_distance,
        'cosine_dist': Distances.cosine_similarity_distance,
    }

    scaling_classes = {
        'min_max_scale': MinMaxScaler,
        'normalize': NormalizationScaler,
    }

    x_train, y_train, x_val, y_val, x_test, y_test = data_processing()

    print('x_train shape = ', x_train.shape)
    print('y_train shape = ', y_train.shape)

    knn = KNN(11, Distances.euclidean_distance)
    knn.train(x_train, y_train)
    knn.predict(x_val)
Beispiel #7
0
 def train_model(self, **kwargs):
     #pass in the training/testing datasets
     #this needs to pass in everytime bc the training data might change
     data_dict = data_processing()
     X_train = data_dict['X_train']
     y_train = data_dict['y_train']
     X_test = data_dict['X_test']
     y_test = data_dict['y_test']
     #build model based on the given model name by user
     model = self.model_func
     model.set_params(**kwargs)
     model.fit(X_train, y_train)
     y_pred = model.predict(X_test)
     self.params = kwargs
     self.accuracy_score = accuracy_score(y_test, y_pred)
     self.feature_importance = model.feature_importances_
     self.version += 1
     self.model_func = model
     #create pickledump objectives which incude the model and the current accuracy_score for this model
     try:
         pickle.dump(self, open(self.picklefile_name, 'ab'))
     except:
         raise Exception(
             f'Problem occured when writing to {self.picklefile_name}')
Beispiel #8
0
def test_transform_model_selection():
    Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing()
    model_selection_with_transformation(distance_funcs, scaling_classes,
                                        Xtrain.tolist(), ytrain.tolist(),
                                        Xval.tolist(), yval.tolist())
Beispiel #9
0
import tensorflow as tf
import model as ml
import data
from configs import DEFINES

# 사전을 구성 한다.
char2idx, idx2char, vocabulary_length = data.load_vocabulary()

# 학습 데이터와 테스트 데이터를 가져온다.
train_input, train_label, eval_input, eval_label = data.load_data()

# 학습 데이터 : 인코딩, 디코딩 입력, 디코딩 출력을 만든다.
train_input_enc = data.data_processing(train_input, char2idx,
                                       DEFINES.enc_input)
train_input_dec = data.data_processing(train_label, char2idx,
                                       DEFINES.dec_input)
train_target_dec = data.data_processing(train_label, char2idx,
                                        DEFINES.dec_target)

# 평가 데이터 : 인코딩, 디코딩 입력, 디코딩 출력을 만든다.
eval_input_enc = data.data_processing(eval_input, char2idx, DEFINES.enc_input)
eval_input_dec = data.data_processing(eval_label, char2idx, DEFINES.dec_input)
eval_target_dec = data.data_processing(eval_label, char2idx,
                                       DEFINES.dec_target)

# 에스티메이터 구성한다.
classifier = tf.estimator.Estimator(model_fn=ml.Model,
                                    model_dir=DEFINES.check_point_path,
                                    params={
                                        'embedding_size':
                                        DEFINES.embedding_size,
Beispiel #10
0
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython

import numpy as np
from hw1_knn import KNN
from utils import euclidean_distance, gaussian_kernel_distance, inner_product_distance, cosine_sim_distance
from utils import f1_score, model_selection_without_normalization, model_selection_with_transformation
distance_funcs = {
    'euclidean': euclidean_distance,
    'gaussian': gaussian_kernel_distance,
    'inner_prod': inner_product_distance,
    'cosine_dist': cosine_sim_distance,
}

from data import data_processing
Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing()

#best_model, best_k, best_function = model_selection_without_normalization(distance_funcs, Xtrain, ytrain, Xval, yval)

from utils import NormalizationScaler, MinMaxScaler

scaling_classes = {
    'min_max_scale': MinMaxScaler,
    'normalize': NormalizationScaler,
}

#best_model, best_k, best_function, best_scaler = model_selection_with_transformation(distance_funcs, scaling_classes, Xtrain, ytrain, Xval, yval)

import data
import hw1_dt as decision_tree
import utils as Utils
Beispiel #11
0
 def test_classify(model):
     from data import data_processing
     
     Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing()
     model.train(Xtrain, ytrain)
     predicted_labels = model.predict(Xtest)
Beispiel #12
0
        'model_hidden_size': DEFINES.model_hidden_size,
        'ffn_hidden_size': DEFINES.ffn_hidden_size,
        'attention_head_size': DEFINES.attention_head_size,
        'learning_rate': DEFINES.learning_rate,
        'vocabulary_length': vocabulary_length,
        'layer_size': DEFINES.layer_size,
        'max_sequence_length': DEFINES.max_sequence_length,
    })
    
print("\n# 채팅 준비중 입니다...")
estimator_predictor = tf.contrib.predictor.from_estimator(classifier, serving_input_fn)
#요걸 쓰면 에스티메이터 써도 매번불러오지 않음

# 모델 빌드, checkpoint 로드를 위해 한 번의 dummy 예측을 수행한다.
print("# 모델을 빌드하고 checkpoint를 로드하고 있습니다...")
predic_input_enc = data.data_processing(["dummy"], char2idx, DEFINES.enc_input)
predic_output_dec = data.data_processing([""], char2idx, DEFINES.dec_input)
predictions = estimator_predictor({"input": predic_input_enc, "output": predic_output_dec})
print("# 채팅 준비가 완료됐습니다.")
print("# 채팅을 종료하려면 'quit'를 입력하세요")
      
for q in range(1000):
    question = input("Q: ")
    if question == 'quit':
        break
    
    predic_input_enc = data.data_processing([question], char2idx, DEFINES.enc_input)
    predic_output_dec = data.data_processing([""], char2idx, DEFINES.dec_input)
    
    predictions = estimator_predictor({"input": predic_input_enc, "output": predic_output_dec})
    sentence_string = [idx2char[index] for index in predictions['indexs'][0]]
Beispiel #13
0
# This Source Code Form is subject to the terms of the MIT
# License. If a copy of the same was not distributed with this
# file, You can obtain one at
# https://github.com/akhilpandey95/scholarlyimpact/blob/master/LICENSE.

import sys
from models import PredictCitationsExist
from evaluation import evaluate, clf_metrics
from data import data_processing, prepare_X_Y
from sklearn.model_selection import train_test_split

if __name__ == '__main__':
    # load the dataset
    data = data_processing('~/Downloads/sch_impact.csv')

    # prepare the X, Y
    X, Y = prepare_X_Y(data, 'target_exp_1')

    # build the train and test samples
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

    # build the model
    classifier = PredictCitationsExist()

    # train the model
    classifier = classifier.train(10, 64, X_train, X_test, Y_train, Y_test, stopping=False)

    # evaluate and print the training stats
    model_evaluation = evaluate(classifier, 'train', x_train=X_train, y_train=Y_train)

    # print training metrics
Beispiel #14
0
def pred_indices(preds, beta=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds + 0.000000001) / beta
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probs = np.random.multinomial(1, preds, 1)
    return np.argmax(probs)


# 채팅 시작
for i in range(10):
    question = input("Q: ")
    if question == 'quit':
        break

    predic_input_enc = data.data_processing([question], word2idx,
                                            DEFINES.enc_input)
    predic_output_dec = data.data_processing([""], word2idx, DEFINES.dec_input)
    predic_target_dec = data.data_processing([""], word2idx,
                                             DEFINES.dec_target)

    predictions = classifier.predict(input_fn=lambda: data.input_fn(
        predic_input_enc, predic_output_dec, predic_target_dec, 1, 1
    ))  #for문을 돌때마다 네트워크 빌드& 체크포인트 적용, 작업을 수행한다->속도저하
    # Estimator의 특성->보완이 필요한 부분

    # 답변 문장에 대한 softmax 확률을 받는다.
    prob = np.array([v['indexs'] for v in predictions])
    prob = np.squeeze(prob)

    # 확률적으로 답변 문장의 인덱스를 생성한다.
    words_index = [pred_indices(p, beta=DEFINES.softmax_beta) for p in prob]
Beispiel #15
0
    'euclidean': euclidean_distance,
    'gaussian': gaussian_kernel_distance,
    'inner_prod': inner_product_distance,
    'cosine_dist': cosine_sim_distance,
}
scaling_classes = {
    'min_max_scale': MinMaxScaler,
    'normalize': NormalizationScaler,
}
from data import data_processing
Xtrain = [[1, 1], [1, 1.25], [1.25, 1], [1.4, 1.75], [1.75, 1.75],
          [1.80, 1.75], [2, 1.75], [1.75, 2.25], [2, 2.5], [2, 3], [2.15, 3],
          [2.45, 3], [2.5, 3], [2.75, 3], [3, 3]]
ytrain = [[0], [0], [0], [0], [1], [1], [1], [1], [1], [2], [2], [2], [2], [2],
          [2]]
Xtest = data_processing()

best_model, best_k, best_function = model_selection_without_normalization(
    distance_funcs, Xtrain, ytrain, Xtrain, ytrain)
print(pr(best_model, Xtest, ytest))
'''
from utils import Information_Gain, get_branches ,get_amount_cls
features =  [[2,     3,      4,    15,     20],
           [4,     6,      11,   13,     15],
           [2,     5,      10,   13,     20],
           [7,     10,     11,   18,     19],
           [11,    13,     16,   18,     19],
           [3,     5,      11,   15,     18],
           [3,     6,      8,    12,     15],
           [5,     11,     15,   17,     18],
           [2,     3,      4,    12,     18],
Beispiel #16
0
def test_model_selection():
    Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing()
    model_selection_without_normalization(distance_funcs, Xtrain.tolist(),
                                          ytrain.tolist(), Xval.tolist(),
                                          yval.tolist())
Beispiel #17
0
# This Source Code Form is subject to the terms of the MIT
# License. If a copy of the same was not distributed with this
# file, You can obtain one at
# https://github.com/akhilpandey95/altpred/blob/master/LICENSE.

import sys
from models import *
from evaluation import evaluate, clf_metrics
from data import data_processing, prepare_word_embeddings
from sklearn.model_selection import train_test_split

if __name__ == '__main__':
    # load the dataset
    #data = data_processing('altmetrics_j2014_full_gamma.csv', 'binary')
    data = data_processing('altmetrics_j2014_full_gamma.csv', 'binary-delta')

    # prepare the X, Y
    X, Y, max_words, max_len = prepare_word_embeddings(data, 'abstract',
                                                       'target')

    # build the train and test samples
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.35)

    # build the model
    classifier = AltpredTwitterGRU(max_words, max_len)
    #classifier = AltpredTwitterLSTM(max_words, max_len)
    #classifier = AltpredTwitterBiDirLSTM(max_words, max_len)

    # train the model
    classifier = classifier.train(5, 256, X_train, Y_train, stopping=False)