Beispiel #1
0

if __name__ == '__main__':

    import toydoop

    # create some user-item data
    data = {
        1: [10, 20, 30, 40, 50, 60, 70, 80, 90],
        2: [10, 30, 110, 120, 130, 140, 150],
        3: [20, 30, 40, 90, 120, 160, 170, 180, 190]
    }
    user_item_counts = dict((k, len(v)) for k, v in data.iteritems())

    datafile = 'bdoopr.in'
    mapout1 = 'bdoopr.map1'
    mapout2 = 'bdoopr.map2'
    outfile = 'bdoopr.out'

    f = open(datafile, 'w')
    for user, items in data.iteritems():
        for item in items:
            print >> f, toydoop.default_formatter(user, item)
    f.close()

    # run two stages of mapreduce
    mapper1 = Mapper1(user_item_counts, oversampling=10)
    toydoop.mapreduce(datafile, mapout1, mapper=mapper1, reducer=reducer1)
    toydoop.mapreduce(datafile, mapout2, mapper=mapper2)  # map the data again
    toydoop.mapreduce([mapout1, mapout2], outfile, reducer=reducer2)
Beispiel #2
0
datafile = sys.argv[1]  # one-indexed, whitespace separated
sample_file = datafile+'.samples'
tmp1 = sample_file+'.tmp1'
tmp2 = sample_file+'.tmp2'

print 'reading data...'
data = loadtxt(datafile)
print 'converting to zero-indexed sparse matrix...'
idxs = data[:,:2]-1
vals = data[:,2]
data = coo_matrix((vals,idxs.T)).tocsr()
user_item_counts = dict((i+1,data[i].getnnz()) for i in xrange(data.shape[0]))

print 'creating samples...'
mapper1 = Mapper1(user_item_counts,oversampling=10)
print 'map-red1...'
toydoop.mapreduce(datafile,tmp1,mapper=mapper1,reducer=reducer1,parser=parser)
print 'map2...'
toydoop.mapreduce(datafile,tmp2,mapper=mapper2,parser=parser)  # map the data again
print 'red2...'
toydoop.mapreduce([tmp1,tmp2],sample_file,reducer=reducer2,formatter=formatter)

print 'training...'
args = BPRArgs()
args.learning_rate = 0.3
num_factors = 10
model = BPR(num_factors,args)
sampler = ExternalSchedule(sample_file,index_offset=1)  # schedule is one-indexed
num_iters = 10
model.train(data,sampler,num_iters)
Beispiel #3
0
print 'reading data...'
data = loadtxt(datafile)
print 'converting to zero-indexed sparse matrix...'
idxs = data[:, :2] - 1
vals = data[:, 2]
data = coo_matrix((vals, idxs.T)).tocsr()
user_item_counts = dict(
    (i + 1, data[i].getnnz()) for i in xrange(data.shape[0]))

print 'creating samples...'
mapper1 = Mapper1(user_item_counts, oversampling=10)
print 'map-red1...'
toydoop.mapreduce(datafile,
                  tmp1,
                  mapper=mapper1,
                  reducer=reducer1,
                  parser=parser)
print 'map2...'
toydoop.mapreduce(datafile, tmp2, mapper=mapper2,
                  parser=parser)  # map the data again
print 'red2...'
toydoop.mapreduce([tmp1, tmp2],
                  sample_file,
                  reducer=reducer2,
                  formatter=formatter)

print 'training...'
args = BPRArgs()
args.learning_rate = 0.3
num_factors = 10
Beispiel #4
0
            yield user,(i,j)

if __name__ == '__main__':

    import toydoop

    # create some user-item data
    data = {
            1:[10,20,30,40,50,60,70,80,90],
            2:[10,30,110,120,130,140,150],
            3:[20,30,40,90,120,160,170,180,190]
           }
    user_item_counts = dict((k,len(v)) for k,v in data.iteritems())

    datafile = 'bdoopr.in'
    mapout1 = 'bdoopr.map1'
    mapout2 = 'bdoopr.map2'
    outfile = 'bdoopr.out'

    f = open(datafile,'w')
    for user,items in data.iteritems():
        for item in items:
            print >>f,toydoop.default_formatter(user,item)
    f.close()

    # run two stages of mapreduce
    mapper1 = Mapper1(user_item_counts,oversampling=10)
    toydoop.mapreduce(datafile,mapout1,mapper=mapper1,reducer=reducer1)
    toydoop.mapreduce(datafile,mapout2,mapper=mapper2)  # map the data again
    toydoop.mapreduce([mapout1,mapout2],outfile,reducer=reducer2)