if __name__ == '__main__': import toydoop # create some user-item data data = { 1: [10, 20, 30, 40, 50, 60, 70, 80, 90], 2: [10, 30, 110, 120, 130, 140, 150], 3: [20, 30, 40, 90, 120, 160, 170, 180, 190] } user_item_counts = dict((k, len(v)) for k, v in data.iteritems()) datafile = 'bdoopr.in' mapout1 = 'bdoopr.map1' mapout2 = 'bdoopr.map2' outfile = 'bdoopr.out' f = open(datafile, 'w') for user, items in data.iteritems(): for item in items: print >> f, toydoop.default_formatter(user, item) f.close() # run two stages of mapreduce mapper1 = Mapper1(user_item_counts, oversampling=10) toydoop.mapreduce(datafile, mapout1, mapper=mapper1, reducer=reducer1) toydoop.mapreduce(datafile, mapout2, mapper=mapper2) # map the data again toydoop.mapreduce([mapout1, mapout2], outfile, reducer=reducer2)
datafile = sys.argv[1] # one-indexed, whitespace separated sample_file = datafile+'.samples' tmp1 = sample_file+'.tmp1' tmp2 = sample_file+'.tmp2' print 'reading data...' data = loadtxt(datafile) print 'converting to zero-indexed sparse matrix...' idxs = data[:,:2]-1 vals = data[:,2] data = coo_matrix((vals,idxs.T)).tocsr() user_item_counts = dict((i+1,data[i].getnnz()) for i in xrange(data.shape[0])) print 'creating samples...' mapper1 = Mapper1(user_item_counts,oversampling=10) print 'map-red1...' toydoop.mapreduce(datafile,tmp1,mapper=mapper1,reducer=reducer1,parser=parser) print 'map2...' toydoop.mapreduce(datafile,tmp2,mapper=mapper2,parser=parser) # map the data again print 'red2...' toydoop.mapreduce([tmp1,tmp2],sample_file,reducer=reducer2,formatter=formatter) print 'training...' args = BPRArgs() args.learning_rate = 0.3 num_factors = 10 model = BPR(num_factors,args) sampler = ExternalSchedule(sample_file,index_offset=1) # schedule is one-indexed num_iters = 10 model.train(data,sampler,num_iters)
print 'reading data...' data = loadtxt(datafile) print 'converting to zero-indexed sparse matrix...' idxs = data[:, :2] - 1 vals = data[:, 2] data = coo_matrix((vals, idxs.T)).tocsr() user_item_counts = dict( (i + 1, data[i].getnnz()) for i in xrange(data.shape[0])) print 'creating samples...' mapper1 = Mapper1(user_item_counts, oversampling=10) print 'map-red1...' toydoop.mapreduce(datafile, tmp1, mapper=mapper1, reducer=reducer1, parser=parser) print 'map2...' toydoop.mapreduce(datafile, tmp2, mapper=mapper2, parser=parser) # map the data again print 'red2...' toydoop.mapreduce([tmp1, tmp2], sample_file, reducer=reducer2, formatter=formatter) print 'training...' args = BPRArgs() args.learning_rate = 0.3 num_factors = 10
yield user,(i,j) if __name__ == '__main__': import toydoop # create some user-item data data = { 1:[10,20,30,40,50,60,70,80,90], 2:[10,30,110,120,130,140,150], 3:[20,30,40,90,120,160,170,180,190] } user_item_counts = dict((k,len(v)) for k,v in data.iteritems()) datafile = 'bdoopr.in' mapout1 = 'bdoopr.map1' mapout2 = 'bdoopr.map2' outfile = 'bdoopr.out' f = open(datafile,'w') for user,items in data.iteritems(): for item in items: print >>f,toydoop.default_formatter(user,item) f.close() # run two stages of mapreduce mapper1 = Mapper1(user_item_counts,oversampling=10) toydoop.mapreduce(datafile,mapout1,mapper=mapper1,reducer=reducer1) toydoop.mapreduce(datafile,mapout2,mapper=mapper2) # map the data again toydoop.mapreduce([mapout1,mapout2],outfile,reducer=reducer2)