Esempio n. 1
0
def bi_item(m, tuples, k, func, func_w):
    # with open('u2U.txt', 'r') as f1:
    #     u2U = pickle.loads(f1.read())
    # with open('m2Ucluster.txt', 'r') as f4:
    #     m2Ucluster = pickle.loads(f4.read())
    with open('m2M.txt', 'r') as f1:
        m2M = pickle.loads(f1.read())
    with open('u2Mcluster.txt', 'r') as f2:
        u2Mcluster = pickle.loads(f2.read())

    res = [] # return the list of predictions given the query tuples
    # m = np.asarray(m, order = 'F') # column-major order
    u2Mcluster = np.asarray(u2Mcluster, order = 'F')
    simPair = model.similarityPair(u2Mcluster , func)
    for pair in tuples:
        #print pair[0]
        c = findCentroid(m2M, pair[0])
        sim = simPair[c]
        temp = model.knn(sim, k)
        prediction = model.predict(u2Mcluster, temp, sim, func_w)
        pred = prediction[pair[1]] + 3
        if pred > 5:
            pred = 5
        elif pred < 1:
            pred = 1
        res.append(pred)# plus 3
        #print pred
    return res
Esempio n. 2
0
def predict():
    skills = request.json['skill']
    headers = [str(i) for i in skills.split(',')]
    payload = request.json['data']
    values = [float(i) for i in payload.split(',')]
    df = data_reader()
    col_list = list(df.columns)
    df_new = pd.DataFrame(columns=col_list)
    X = pd.DataFrame([values], columns=headers, dtype=float)
    X_te = pd.concat([df_new, X], axis=0, sort=False)
    X_te.fillna(0, inplace=True)

    X_tr, X_te, X_tr_sc, X_te_sc = preprocess(df, X_te)
    my_model = knn(X_tr_sc, 5, 0.4)
    prediction_results = prediction(my_model, X_tr, X_te, X_tr_sc, X_te_sc)
    prediction_results = prediction_results.loc[:,
                                                (prediction_results != 0).any(
                                                    axis=0)]
    return json.dumps(json.loads(prediction_results.to_json(orient='index')))
Esempio n. 3
0
def bi_user(m, tuples, k, func, func_w):
    with open('u2U.txt', 'r') as f1:
        u2U = pickle.loads(f1.read())
    with open('m2Ucluster.txt', 'r') as f2:
        m2Ucluster = pickle.loads(f2.read())
    m2Ucluster = m2Ucluster.transpose()

    res = [] # return the list of predictions given the query tuples
    m2Ucluster = np.asarray(m2Ucluster, order = 'F')
    simPair = model.similarityPair(m2Ucluster , func)
    for pair in tuples:
        #print pair[0]
        c = findCentroid(u2U, pair[1])
        sim = simPair[c]
        temp = model.knn(sim, k)
        prediction = model.predict(m2Ucluster, temp, sim, func_w)
        pred = prediction[pair[0]] + 3
        if pred > 5:
            pred = 5
        elif pred < 1:
            pred = 1
        res.append(pred)# plus 3
        #print pred
    return res
Esempio n. 4
0
print("""\n# =============================================================================
# linear model
# =============================================================================\n""")
m_lin = model.linear_regression(Q)
m_lin.train(LM_selected)
y_lin = m_lin.evaluate(T_selected)*std_dataset[-1]+mean_dataset[-1]
e_lin = m_lin.error()*std_dataset[-1]

print('linear error = ',e_lin)

print("""\n# =============================================================================
# knn model
# =============================================================================\n""")

my_knn = model.knn()
k_opt,error_array_knn = my_knn.meta_find(LM_selected,vm.default)
error_array_knn=error_array_knn*std_dataset[-1]
print("k_opt = {}".format(k_opt))
print("error_array = ")
print(error_array_knn)
my_plt.plt_knn_meta(my_knn,error_array_knn)

my_knn.train(LM_selected)
y_knn = my_knn.evaluate(T_selected)*std_dataset[-1]+mean_dataset[-1]
e_knn = my_knn.error()*std_dataset[-1]

print('error = ',e_knn)

print("""\n# =============================================================================
# rbfn model
Esempio n. 5
0
        print(data.shape)
        print(label.shape)'''

    B = 9840
    N = 1024
    K = 20

    data, _ = load_data('train')
    print('data type:', type(data))
    print('data shape:', data.shape)
    data = data[:B, :N, :]
    print('data shape:', data.shape)  #(10,1024,3)
    data = torch.from_numpy(data)
    data = data.view(B, -1, N)

    ne_idx = knn(data, K)
    print('ne_idx shape:', ne_idx.shape)
    #print('ne_idx:\n', ne_idx)
    ne_idx = ne_idx.reshape(B * N * K)
    print('ne_idx:', ne_idx)

    #device = torch.device('cuda')
    #center_idx = torch.arange(0, N, device=device).view(1, N, 1).repeat(B, 1, K).reshape(B*N*K)
    center_idx = torch.arange(0, N).view(1, N, 1).repeat(B, 1,
                                                         K).reshape(B * N * K)
    print('center_idx shape:', center_idx.shape)
    print('center_idx:\n', center_idx)

    batch_idx = torch.arange(0, B).view(B, 1, 1).repeat(1, N,
                                                        K).reshape(B * N * K)
    print('batch_idx shape:', batch_idx.shape)
M = m.toarray()

u4321 = M[4321]

print u4321.nonzero()[0].size
print np.where(u4321 == 1)[0].shape
print np.where(u4321 == 3)[0].shape
print np.where(u4321 == 5)[0].shape
print np.average(u4321[u4321.nonzero()[0].tolist()])

m3 = M[:, 3]

print m3.nonzero()[0].size
print np.where(m3 == 1)[0].shape
print np.where(m3 == 3)[0].shape
print np.where(m3 == 5)[0].shape
print np.average(m3[m3.nonzero()[0].tolist()])

M = readHelper.readTrain('train.csv')

sim = model.similarity(M, 4321, 'dotp')
print model.knn(sim, 5)
sim = model.similarity(M, 4321, 'cosine')
print model.knn(sim, 5)

sim = model.similarity(M.transpose(), 3, 'dotp')
print model.knn(sim, 5)
sim = model.similarity(M.transpose(), 3, 'cosine')
print model.knn(sim, 5)
Esempio n. 7
0
print('SVM classifier')
svmmodel = m.svm((xtrain, ytrain), (xtext, ytest))

# Decision Tree
print('Decision Tree')
dtmodel = m.decisionTree((xtrain, ytrain), (xtext, ytest))

# Random Tree
print('Random Tree')
rtmodel = m.randomTree((xtrain, ytrain), (xtext, ytest))

#Logistic Classifier
# Random Tree
print('Logistic classifier')
logistimodel = m.logisticClassifer((xtrain, ytrain), (xtext, ytest))

#knn
print('KNN')
knn = m.knn((xtrain, ytrain), (xtext, ytest))

#adaboosting
print('Adaboosting')
adaboosting = m.adaboosting((xtrain, ytrain), (xtext, ytest))

#voting
print('Voting')
classifiers = [('svm', svmmodel), ('dt', dtmodel), ('rt', rtmodel),
               ('lt', logistimodel), ('knn', knn),
               ('addboosting', adaboosting)]
votingmodel = m.voting((xtrain, ytrain), (xtext, ytest), classifiers)
M = m.toarray()

u4321 = M[4321]

print u4321.nonzero()[0].size
print np.where(u4321 == 1)[0].shape
print np.where(u4321 == 3)[0].shape
print np.where(u4321 == 5)[0].shape
print np.average(u4321[u4321.nonzero()[0].tolist()])

m3 = M[:,3]

print m3.nonzero()[0].size
print np.where(m3 == 1)[0].shape
print np.where(m3 == 3)[0].shape
print np.where(m3 == 5)[0].shape
print np.average(m3[m3.nonzero()[0].tolist()])

M = readHelper.readTrain('train.csv')

sim = model.similarity(M, 4321, 'dotp')
print model.knn(sim,5)
sim = model.similarity(M, 4321, 'cosine')
print model.knn(sim,5)

sim = model.similarity(M.transpose(), 3, 'dotp')
print model.knn(sim,5)
sim = model.similarity(M.transpose(), 3, 'cosine')
print model.knn(sim,5)