def test_import_dense_type_mat(): """4 Test the type after saving and loading of .sparset1 format. """ x = np.random.rand(3, 2) export_data('/tmp/test.mat', x) assert x.dtype == import_data('/tmp/test.mat').dtype
def test_import_sparse_values_mat(): """ Test values after saving and loading of .sparse format. """ x = sps.csr_matrix(np.random.rand(3, 2)) export_data('/tmp/test.mat', x) assert np.array_equal(x.toarray(), import_data('/tmp/test.mat').toarray())
def test_import_dense_values_mat(): """ Test values after saving and loading of .sparse format. """ x = np.random.rand(3, 2) export_data('/tmp/test.mat', x) assert np.array_equal(x, import_data('/tmp/test.mat'))
def test_import_type_sparsetxt(): """4 Test the type after saving and loading of .sparset1 format. """ x = sps.csr_matrix(np.random.rand(3, 2)) export_data('/tmp/test.sparsetxt', x) assert x.dtype == import_data('/tmp/test.sparsetxt').dtype
def test_import_type_densetxt(): """ Test the type after saving and loading of .dense format. """ x = np.random.rand(7, 11) export_data('/tmp/test.densetxt', x) assert x.dtype == import_data('/tmp/test.densetxt').dtype
def test_import_values_dense(): """ Test values after saving and loading of .dense format. """ x = np.random.rand(7, 11) export_data('/tmp/test.dense', x) assert np.array_equal(x, import_data('/tmp/test.dense'))
# age================================== ages.append(float(line[1])) # sex================================== if line[2] == 'M': sexes.append(1) else: sexes.append(0) # occupation================================== occupations.append(occmap[line[3]]) # first three zip code digits: section center facility (regional mail sorting centers) if line[4][0:3].isdigit(): zipcodes.append(int(line[4][0:3])) else: zipcodes.append(0) loader.export_data(args.outpath + 'features_sex.index', loader.HotIndex(numpy.array(sexes), 2)) loader.export_data(args.outpath + 'features_occ.index', loader.HotIndex(numpy.array(occupations), len(occlist) -1)) loader.export_data(args.outpath + 'features_zip.index', loader.HotIndex(numpy.array(zipcodes), 1000)) loader.export_data(args.outpath + 'features_age.mat', numpy.array(ages)) # inspect processed data sex = loader.imatload(args.outpath + 'features_sex.index') print('sex:') print(sex.vec.shape, sex.dim) print(sex.vec[0:10]) print('\n') occ = loader.imatload(args.outpath + 'features_occ.index') print('occ:') print(occ.vec.shape, occ.dim) print(occ.vec[0:10])
print(len(order)) random.shuffle(order) sparse = sparse[order, :] # time = loader.import_data(args.readpath + 'features_time.densetxt') # time = time[order] split_size = int(round(num_ratings * .025)) num_users = sparse[:, 0].max() num_items = sparse[:, 1].max() rows = sparse[:, 0] - 1 cols = sparse[:, 1] - 1 vals = sparse[:, 2] loader.export_data(args.writepath + '500k/dev/features_user.index', loader.HotIndex(rows[0:split_size], num_users)) loader.export_data(args.writepath + '500k/dev/features_item.index', loader.HotIndex(cols[0:split_size], num_items)) loader.export_data(args.writepath + '500k/dev/labels_ratings.mat', vals[0:split_size]) loader.export_data(args.writepath + '500k/test/features_user.index', loader.HotIndex(rows[split_size:2 * split_size], num_users)) loader.export_data(args.writepath + '500k/test/features_item.index', loader.HotIndex(cols[split_size:2 * split_size], num_items)) loader.export_data(args.writepath + '500k/test/labels_ratings.mat', vals[split_size:2 * split_size]) loader.export_data( args.writepath + '500k/train/features_user.index', loader.HotIndex(rows[2 * split_size:2 * split_size + 500000], num_users))
first = False lines = infile.readlines() for line in lines: line = line.split('|') # month released================================== date = line[2].split('-') month.append(monthmap[date[1]]) # year released================================== year.append(float(int(date[2]))) genres = line[5:len(line)] for i in range(len(genres)): genres[i] = float(genres[i]) genre.append(genres) # print(month_matrix.vec.shape, month_matrix.dim, genre_matrix.vec.shape, genre_matrix.dim, year_matrix.shape) loader.export_data(args.outpath + 'features_month.index', loader.HotIndex(numpy.array(month), 12)) loader.export_data(args.outpath + 'features_year.mat', numpy.array(year)) loader.export_data(args.outpath + 'features_genre.mat', numpy.array(genre)) # inspect processed data month = loader.import_data(args.outpath + 'features_month.index') print('month:') print(month.vec.shape, month.dim) print(month.vec[0:20]) print('\n') year = loader.import_data(args.outpath + 'features_year.mat') print('year:') print(year.shape) print(year[0:10]) print('\n')
print(len(order)) random.shuffle(order) sparse = sparse[order, :] # time = loader.import_data(args.readpath + 'features_time.densetxt') # time = time[order] split_size = int(round(num_ratings*.1)) num_users = sparse[:,0].max() num_items = sparse[:,1].max() rows = sparse[:, 0] - 1 cols = sparse[:, 1] - 1 vals = sparse[:, 2] # loader.export_data(args.writepath + 'dev/features_time.mat', time[0:split_size]) loader.export_data(args.writepath + 'dev/features_user.index', loader.HotIndex(rows[0:split_size], num_users)) loader.export_data(args.writepath + 'dev/features_item.index', loader.HotIndex(cols[0:split_size], num_items)) loader.export_data(args.writepath + 'dev/labels_ratings.mat', vals[0:split_size]) # loader.export_data(args.writepath + 'test/features_time.mat', time[split_size:split_size*2]) loader.export_data(args.writepath + 'test/features_user.index', loader.HotIndex(rows[split_size:split_size*2], num_users)) loader.export_data(args.writepath + 'test/features_item.index', loader.HotIndex(cols[split_size:split_size*2], num_items)) loader.export_data(args.writepath + 'test/labels_ratings.mat', vals[split_size:split_size*2]) # loader.export_data(args.writepath + 'train/features_time.mat', time[split_size*2:time.shape[0]]) loader.export_data(args.writepath + 'train/features_user.index', loader.HotIndex(rows[split_size*2:sparse.shape[0]],num_users)) loader.export_data(args.writepath + 'train/features_item.index', loader.HotIndex(cols[split_size*2:sparse.shape[0]], num_items)) loader.export_data(args.writepath + 'train/labels_ratings.mat', vals[split_size*2:sparse.shape[0]])
words = loader.import_data( '/home/aarontuor/data/ml100k/item/features_bin_doc_term.mat') genre_dist = genres / (norm(genres, axis=1, keepdims=True) * norm(genres, axis=1, keepdims=True)) devgenre = genres[dev_item.vec] testgenre = genres[test_item.vec] traingenre = genres[train_item.vec] devwords = words[dev_item.vec] testwords = words[test_item.vec] trainwords = words[train_item.vec] print(devgenre.shape) print(testgenre.shape) print(traingenre.shape) print(devwords.shape) print(testwords.shape) print(trainwords.shape) loader.export_data('/home/aarontuor/data/ml100k/dev/labels_genre.mat', devgenre) loader.export_data('/home/aarontuor/data/ml100k/train/labels_genre.mat', traingenre) loader.export_data('/home/aarontuor/data/ml100k/test/labels_genre.mat', testgenre) loader.export_data('/home/aarontuor/data/ml100k/dev/features_words.mat', devwords) loader.export_data('/home/aarontuor/data/ml100k/train/features_words.mat', trainwords) loader.export_data('/home/aarontuor/data/ml100k/test/features_words.mat', testwords)