# Sample the user ratings from the Input files
data = []
rawdata = np.genfromtxt("data/ratings_data.txt",
                        delimiter=' ',
                        dtype=None,
                        names=True)
size = (664823, 3)
data = np.zeros(size)
p = 0
for i in range(664823):
    data[i][0] = rawdata[i][0]
    data[i][1] = rawdata[i][1]
    data[i][2] = rawdata[i][2]
size = (581720, 1)
sampled_user_item_data = np.zeros(size)
size = (581720, 3)
sample_user_item_training = np.zeros(size)
sampled_user_item_data = np.array(get_random(664823, 581720))
ind = np.zeros(664823)
ind[sampled_user_item_data] = 1
size_test = (83103, 3)
sample_user_item_test = np.zeros(size_test)
sample_user_item_training = data[ind == 1]
sample_user_item_test = data[ind == 0]
print sample_user_item_training[0]
print sample_user_item_test[0]

# Saving the test data and the training and the test into npy files
np.save("training", sample_user_item_training)
np.save("test", sample_user_item_test)
Ejemplo n.º 2
0
from ReservoirSample import get_random
import numpy as np

# Sample the user ratings from the Input files
data = np.genfromtxt("data/trimmed_items_ratings.txt",
                     delimiter=' ',
                     dtype=int)

datasize = data.shape[0]
testsize = int(.2 * datasize)
trainsize = datasize - testsize

#generating training & test indices using reservoir sampling
sample_user_item_training = np.zeros((trainsize, 3))
sampled_user_item_data = np.array(get_random(datasize, trainsize))

ind = np.zeros(datasize)
ind[sampled_user_item_data] = 1
sample_user_item_test = np.zeros((testsize, 3))
sample_user_item_training = data[ind == 1]
sample_user_item_test = data[ind == 0]

# writing the trimmed training data
open('trimmed_training.txt', 'w').close()
for i in range(trainsize):
    p = str(sample_user_item_training[i][0]) + " " + str(
        sample_user_item_training[i][1]) + " " + str(
            sample_user_item_training[i][2]) + "\n"
    with open('trimmed_training.txt', 'a') as f:
        f.write(p)
import numpy as np

# Sample the user ratings from the Input files
data = []
rawdata=np.genfromtxt("data/ratings_data.txt",delimiter=' ',dtype=None, names=True)
size = (664823,3)
data= np.zeros(size)
p=0;
for i in range(664823):
    data[i][0]=rawdata[i][0]
    data[i][1]=rawdata[i][1]
    data[i][2]=rawdata[i][2]
size = (581720,1)
sampled_user_item_data = np.zeros(size)
size = (581720,3)
sample_user_item_training =np.zeros(size)
sampled_user_item_data = np.array(get_random(664823,581720))
ind=np.zeros(664823)
ind[sampled_user_item_data]=1
size_test = (83103,3)
sample_user_item_test =np.zeros(size_test)
sample_user_item_training=data[ind==1]
sample_user_item_test=data[ind==0]
print sample_user_item_training[0]
print sample_user_item_test[0]

# Saving the test data and the training and the test into npy files
np.save("training",sample_user_item_training)
np.save("test",sample_user_item_test)

from ReservoirSample import get_random
import numpy as np


# Sample the user ratings from the Input files
data=np.genfromtxt("data/trimmed_items_ratings.txt",delimiter=' ',dtype=int)

datasize=data.shape[0]
testsize=int(.2*datasize)
trainsize=datasize-testsize

#generating training & test indices using reservoir sampling
sample_user_item_training =np.zeros((trainsize,3))
sampled_user_item_data = np.array(get_random(datasize,trainsize))

ind=np.zeros(datasize)
ind[sampled_user_item_data]=1
sample_user_item_test =np.zeros((testsize,3))
sample_user_item_training=data[ind==1]
sample_user_item_test=data[ind==0]


# writing the trimmed training data 
open('trimmed_training.txt', 'w').close()
for i in range(trainsize):
    p = str(sample_user_item_training[i][0]) +" "+ str(sample_user_item_training[i][1])+" "+str(sample_user_item_training[i][2])+"\n"
    with open('trimmed_training.txt', 'a') as f:
        f.write(p)

# writing the trimmed test data 
open('trimmed_test.txt', 'w').close()