예제 #1
0
def train_auto(nb_epoch=10,
               test_p=0.1,
               nb_hunits=10,
               lambda_reg=0.001,
               learningrate=0.01):
    train_M, _, k, test_ratings, _, _, nb_users, nb_movies = get_data()
    train_M = train_M.T
    prediction_M = np.zeros((nb_movies, nb_users), dtype=np.float32)
    RMSE_list = [0] * nb_epoch

    # set up theano autoencoder structure and update function
    X = T.dvector("input")
    X_observed = T.dvector("observedIndex")
    update_matrix = T.matrix("updateIndex")
    V = theano.shared(np.random.randn(nb_hunits, nb_users), name='V')
    miu = theano.shared(np.zeros(nb_hunits), name='miu')
    W = theano.shared(np.random.randn(nb_users, nb_hunits), name='W')
    b = theano.shared(np.zeros(nb_users), name='b')
    z1 = T.nnet.sigmoid(V.dot(X) + miu)
    z2 = W.dot(z1) + b
    loss_reg = 1.0 / nb_movies * lambda_reg / 2 * (T.sum(T.sqr(V)) +
                                                   T.sum(T.sqr(W)))
    loss = T.sum(T.sqr((X - z2) * X_observed)) + loss_reg
    gV, gmiu, gW, gb = T.grad(loss, [V, miu, W, b])

    train = theano.function(inputs=[X, X_observed, update_matrix],
                            outputs=[z2],
                            updates=((V,
                                      V - learningrate * gV * update_matrix),
                                     (miu, miu - learningrate * gmiu),
                                     (W,
                                      W - learningrate * gW * update_matrix.T),
                                     (b, b - learningrate * gb * X_observed)))

    for j in range(nb_epoch):
        print(str(j + 1) + " epoch")
        for i in np.random.permutation(nb_movies):
            Ri = train_M[i, :]
            Ri_observed = Ri.copy()
            Ri_observed[Ri > 0] = 1
            update_m = np.tile(Ri_observed, (nb_hunits, 1))
            Ri_predicted = train(Ri, Ri_observed, update_m)
            prediction_M[i, :] = np.array(Ri_predicted)
        RMSE_list[j] = cal_MAE(prediction_M, test_ratings)

    print("training complete")
    return nb_epoch, RMSE_list
예제 #2
0
	aa = user_movies_matrix[user].tolist()
	for i in range(len(aa)):
		suma+=aa[i]
		counta+=1
	meanadd = suma/counta
	return meanadd	

# files = [1,2,3,4,5]
kvals = [10,20,30,40,50]

for k_index in range(len(kvals)):
	print("\n")
	print("K value: " + str(kvals[k_index]))
	avg_mae=0

	train_matrix, test_matrix, train_grades, test_grades, train_data, test_data, train_matrix_shape0,train_matrix_shape1 = matrix_creation.get_data()
	train_matrix = train_matrix.T
	user_movies_matrix = train_matrix
	matrix_val = cosine_similarity(train_matrix)
	user_similarity_matrix = pd.DataFrame(matrix_val)

	summy = count = rmsesum = 0
	for ind in range(len(test_grades)):
		try:
	# if count>4000:
			count+=1
			user = test_grades[ind][1]
			movie = test_grades[ind][0]
			rating = test_grades[ind][2]

			similar_users_list = user_similarity_matrix[user].tolist()
예제 #3
0
def train_auto(nb_epoch=100,
               test_p=0.1,
               nb_hunits=10,
               lambda_reg=0.001,
               learningrate=0.01,
               userid=1,
               semester=5):
    train_M, _, k, test_ratings, train_data, test_data, nb_users, nb_items = get_data(
    )

    with open('train_dic_with_sem.pkl', 'rb') as f:
        data = pickle.load(f)

    c = 0
    subj_id_mapping = {}
    filer = open('subj_id_mapping.txt', 'r')
    for f in filer:
        if int(f.split("\t")[1]) not in subj_id_mapping:
            subj_id_mapping[int(f.split("\t")[1])] = f.strip().split("\t")[0]
    filer.close()

    subj_sem_mapping = {}
    filer = open('subj_sem_mapping.txt', 'r')
    for f in filer:
        if int(f.split("\t")[0]) not in subj_sem_mapping:
            subj_sem_mapping[int(f.split("\t")[0])] = [
                int(x) for x in f.strip().split("\t")[1][1:-1].split(',')
            ]
    filer.close()

    train_M = train_M.T
    prediction_M = np.zeros((nb_items, nb_users), dtype=np.float32)

    flag = 0
    # set up theano autoencoder structure and update function
    X = T.dvector("input")
    flag += 1
    X_observed = T.dvector("observedIndex")
    update_matrix = T.matrix("updateIndex")
    update_completed = flag = 1
    V = theano.shared(np.random.randn(nb_hunits, nb_users), name='V')
    flag = V
    miu = theano.shared(np.zeros(nb_hunits), name='miu')
    flag = update_completed
    W = theano.shared(np.random.randn(nb_users, nb_hunits), name='W')
    theano_flag = 10
    b = theano.shared(np.zeros(nb_users), name='b')
    for testi in range(theano_flag):
        flag += 1
    z1 = T.nnet.sigmoid(V.dot(X) + miu)
    z2 = W.dot(z1) + b
    update_completed += 1
    loss_reg = 1.0 / nb_items * lambda_reg / 2 * (T.sum(T.sqr(V)) +
                                                  T.sum(T.sqr(W)))
    update = loss_reg
    loss = T.sum(T.sqr((X - z2) * X_observed)) + loss_reg
    flag += 1
    gV, gmiu, gW, gb = T.grad(loss, [V, miu, W, b])
    print("")
    minnmae = float('inf')
    minnrmse = float('inf')

    train = theano.function(inputs=[X, X_observed, update_matrix],
                            outputs=[z2],
                            updates=((V,
                                      V - learningrate * gV * update_matrix),
                                     (miu, miu - learningrate * gmiu),
                                     (W,
                                      W - learningrate * gW * update_matrix.T),
                                     (b, b - learningrate * gb * X_observed)))

    flag = 1
    for j in range(nb_epoch):
        # print(str(j + 1) + " epoch")
        flag = 0
        for i in np.random.permutation(nb_items):
            flag += 1
            Ri = train_M[i, :]
            Ri_observed = Ri.copy()
            #flag = xflag
            Ri_observed[Ri > 0] = 1
            update_m = np.tile(Ri_observed, (nb_hunits, 1))
            flag += 1
            Ri_predicted = train(Ri, Ri_observed, update_m)
            #xflag+=1
            prediction_M[i, :] = np.array(Ri_predicted)

        mainlist = []
        for user in prediction_M.T:
            for subji in range(user.shape[0]):
                subj = user[subji]
                mainlist.append(subj)
        mainlist.sort()

        mae = cal_MAE(prediction_M, test_ratings, mainlist)
        rmse = cal_RMSE(prediction_M, test_ratings, mainlist)
        # mae=0
        # rmse=0
        if mae < minnmae:
            minnmae = mae
            minnrmse = rmse
            minnprediction_Mmae = prediction_M

    arr = []
    mainlist = []
    minnprediction_Mmae = minnprediction_Mmae.T
    for user in minnprediction_Mmae:
        for subji in range(user.shape[0]):
            subj = user[subji]
            mainlist.append(subj)

    mainlist.sort()

    useri = userid - 1
    user = minnprediction_Mmae[userid - 1]
    for subji in range(user.shape[0]):
        subj = user[subji]
        if subji in subj_sem_mapping and semester in subj_sem_mapping[subji]:
            pg = abs(10 * (subj - mainlist[0]) /
                     ((mainlist[-1] - mainlist[0]) * 1.0))

            if (useri, subji) in test_data:

                ag = test_data[(useri, subji)]
                arr.append((subj_id_mapping[subji], pg, ag, abs(pg - ag)))
            else:

                ag = train_data[(useri, subji)]
                arr.append((subj_id_mapping[subji], pg, ag, abs(pg - ag)))

    sorted_arr = sorted(arr, key=lambda x: x[1], reverse=True)

    mlist = []
    c = 0
    for s in sorted_arr:
        if s[2] == 0 and len(mlist) < 5:
            dict = {}
            dict['subj'] = s[0]
            dict['predgrade'] = s[1]
            dict['truegrade'] = s[2]
            dict['err'] = s[3]
            mlist.append(dict)
    for s in sorted_arr:
        if s[2] != 0 and len(mlist) < 10:
            dict = {}
            dict['subj'] = s[0]
            dict['predgrade'] = s[1]
            dict['truegrade'] = s[2]
            dict['err'] = s[3]
            mlist.append(dict)

    return json.dumps(mlist)